From f786fe4d6e619e05e52abc66213bb243e291acda Mon Sep 17 00:00:00 2001
From: Manupa Karunaratne <manupa.karunaratne@arm.com>
Date: Thu, 22 Apr 2021 19:23:06 +0100
Subject: [PATCH] Improved MLF to contain workspace info

Added functionality to calculate workspace, io and constant
memory required by each primfunc and main function. Moreover,
the workspace information required by each primfunc and main
is reported in metadata.json in the Model Library Format(MLF).
- added functionality to record tir and relay primfuncs
- added tests for model_library_format changes

Change-Id: Ib4a8b787345aa35f8a1645e8a648fad84de37bce
---
 python/tvm/micro/model_library_format.py      |  94 ++++++++-
 python/tvm/relay/backend/__init__.py          |   1 +
 python/tvm/relay/backend/_ffi_api.py          |  21 ++
 .../relay/backend/graph_executor_factory.py   |   7 +-
 python/tvm/relay/backend/utils.py             |  29 +++
 python/tvm/relay/build_module.py              |  10 +-
 src/relay/backend/build_module.cc             |   8 +
 src/relay/backend/graph_executor_codegen.cc   | 197 ++++++++++++++++--
 src/relay/backend/utils.cc                    |  44 ++++
 src/relay/backend/utils.h                     |  25 +++
 .../test_micro_model_library_format.py        |  99 ++++++++-
 11 files changed, 507 insertions(+), 28 deletions(-)
 create mode 100644 python/tvm/relay/backend/_ffi_api.py
 create mode 100644 python/tvm/relay/backend/utils.py
 create mode 100644 src/relay/backend/utils.cc

diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
index 6768e03f4473f..8d6ce9a4d8f72 100644
--- a/python/tvm/micro/model_library_format.py
+++ b/python/tvm/micro/model_library_format.py
@@ -27,6 +27,8 @@
 from ..relay.backend import graph_executor_factory
 from ..relay import param_dict
 
+MAIN_FUNC_NAME_STR = "main_func"
+
 
 class UnsupportedInModelLibraryFormatError(Exception):
     """Raised when export_model_library_format does not support the given Module tree."""
@@ -73,8 +75,16 @@ def _populate_codegen_dir(mod, codegen_dir: str):
         dso_mod.save(file_name)
 
 
-def _build_memory_map(graph_json):
-    """Build a simpler memory map from graph JSON.
+def _build_memory_map(graph_json, function_metadata):
+    ret = {
+        "sids": _build_sid_map(graph_json),
+        "functions": _build_function_memory_map(function_metadata),
+    }
+    return ret
+
+
+def _build_sid_map(graph_json):
+    """Build a simpler storage id info map from graph JSON.
 
     Parameters
     ----------
@@ -117,6 +127,82 @@ def _build_memory_map(graph_json):
     return memory_map
 
 
+def _build_function_memory_map(function_metadata):
+    """Build a simple map that shows how much workspace is required to execute
+    each primitive function. The main_func describes how much memory is required
+    to execute the main control code.
+
+    Parameters
+    ----------
+    function_metadata : Map<String, FunctionInfo>
+        This contains all the compiled metadata on a function basis
+
+    Returns
+    -------
+    dict :
+        This will have two entries:
+        1.) A list with one entry per function describing local memory it is using.
+        2.) A global memory requirement if all functions are executed sequentially
+    """
+    device_max_workspace = dict()
+    num_targets = len(function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items())
+    func_entries = []
+    target_local_entries = dict()
+    for i in range(num_targets):
+        for func_name, finfo in function_metadata.items():
+            if func_name == MAIN_FUNC_NAME_STR:
+                continue
+            target = finfo.workspace_sizes.items()[i][0]
+            device_max_workspace[target] = 0
+            target_local_entries[func_name] = list()
+
+        for func_name, finfo in function_metadata.items():
+            if func_name == MAIN_FUNC_NAME_STR:
+                continue
+            assert len(finfo.constant_sizes.items()) == num_targets
+            assert len(finfo.io_sizes.items()) == num_targets
+            target = finfo.workspace_sizes.items()[i][0]
+            workspace_size = finfo.workspace_sizes.items()[i][1]
+            target_entry = {
+                "device": int(target.kind.device_type),
+                "workspace_size_bytes": int(workspace_size),
+            }
+            target_local_entries[func_name].append(target_entry)
+            if workspace_size > device_max_workspace[target]:
+                device_max_workspace[target] = workspace_size
+
+    for func_name, target_entries_ in target_local_entries.items():
+        func_entry = {
+            "function_name": str(func_name),
+            "workspace": target_entries_,
+        }
+        func_entries.append(func_entry)
+
+    target_main_entries = list()
+    for i in range(num_targets):
+        target = function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items()[i][0]
+        main_func_local_workspace = function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items()[
+            i
+        ][1]
+        main_func_constants = function_metadata[MAIN_FUNC_NAME_STR].constant_sizes.items()[i][1]
+        main_func_io = function_metadata[MAIN_FUNC_NAME_STR].io_sizes.items()[i][1]
+        target_main_entries.append(
+            {
+                "device": int(target.kind.device_type),
+                "workspace_size_bytes": int(device_max_workspace[target])
+                + int(main_func_local_workspace),
+                "constants_size_bytes": int(main_func_constants),
+                "io_size_bytes": int(main_func_io),
+            }
+        )
+
+    ret = {
+        "operator_functions": func_entries,
+        "main_function": target_main_entries,
+    }
+    return ret
+
+
 def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactoryModule, file_name):
     """Export the build artifact in Model Library Format.
 
@@ -133,10 +219,10 @@ def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactory
     """
     tempdir = utils.tempdir()
     metadata = {
-        "version": 1,
+        "version": 2,
         "model_name": mod.libmod_name,
         "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"),
-        "memory": _build_memory_map(mod.graph_json),
+        "memory": _build_memory_map(mod.graph_json, mod.function_metadata),
         "target": {int(k): str(v) for k, v in mod.target.items()},
         "runtimes": ["graph"],
     }
diff --git a/python/tvm/relay/backend/__init__.py b/python/tvm/relay/backend/__init__.py
index 4fc2b63748db5..f4d911a22bfe2 100644
--- a/python/tvm/relay/backend/__init__.py
+++ b/python/tvm/relay/backend/__init__.py
@@ -16,3 +16,4 @@
 # under the License.
 """Backend codegen modules for relay."""
 from . import compile_engine
+from . import utils
diff --git a/python/tvm/relay/backend/_ffi_api.py b/python/tvm/relay/backend/_ffi_api.py
new file mode 100644
index 0000000000000..2d27709aee0ba
--- /dev/null
+++ b/python/tvm/relay/backend/_ffi_api.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""FFI APIs for tvm.relay.backend"""
+import tvm._ffi
+
+
+tvm._ffi._init_api("relay.backend", __name__)
diff --git a/python/tvm/relay/backend/graph_executor_factory.py b/python/tvm/relay/backend/graph_executor_factory.py
index d6959d22e5c87..f479c0506a1e7 100644
--- a/python/tvm/relay/backend/graph_executor_factory.py
+++ b/python/tvm/relay/backend/graph_executor_factory.py
@@ -39,9 +39,13 @@ class GraphExecutorFactoryModule:
         The name of module
     params : dict of str to NDArray
         The parameters of module
+    function_metadata : Map of String to FunctionInfo
+        This holds a map function names to their information
     """
 
-    def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params):
+    def __init__(
+        self, ir_mod, target, graph_json_str, libmod, libmod_name, params, function_metadata
+    ):
         assert isinstance(graph_json_str, string_types)
         fcreate = get_global_func("tvm.graph_executor_factory.create")
         args = []
@@ -56,6 +60,7 @@ def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params):
         self.libmod_name = libmod_name
         self.params = params
         self.iter_cnt = 0
+        self.function_metadata = function_metadata
 
     def export_library(self, file_name, fcompile=None, addons=None, **kwargs):
         return self.module.export_library(file_name, fcompile, addons, **kwargs)
diff --git a/python/tvm/relay/backend/utils.py b/python/tvm/relay/backend/utils.py
new file mode 100644
index 0000000000000..f281f9d57ab88
--- /dev/null
+++ b/python/tvm/relay/backend/utils.py
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""The utility functions and classes for relay backend compilation"""
+from tvm.runtime import Object
+from . import _ffi_api
+
+
+class FunctionInfo(Object):
+    """A data structure to hold metadata of relay primitive functions"""
+
+    def __init__(self, dummy):
+        self.__init_handle_by_constructor__(_ffi_api.FunctionInfo, dummy)
+
+    def set_workspace_size(self, target, size):
+        _ffi_api._FunctionInfo_SetWorkspaceSize(self, target, size)
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index ed59ad9bdc8f9..35a52afb8b7bb 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -83,6 +83,7 @@ def __init__(self):
         self._optimize = self.mod["optimize"]
         self._set_params_func = self.mod["set_params"]
         self._get_params_func = self.mod["get_params"]
+        self._get_function_metadata = self.mod["get_function_metadata"]
 
     def build(self, mod, target=None, target_host=None, params=None):
         """
@@ -189,6 +190,12 @@ def get_module(self):
         """Return the built module."""
         return self._get_module()
 
+    def get_function_metadata(self):
+        """Return the compiled function metadata.
+        Currently, the metadata contains workspace size required by
+        each PrimFunc"""
+        return self._get_function_metadata()
+
     def get_params(self):
         """Return the updated weights."""
         params = self._get_params_func()
@@ -288,8 +295,9 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     with tophub_context:
         bld_mod = BuildModule()
         graph_json, runtime_mod, params = bld_mod.build(mod=ir_mod, target=target, params=params)
+        func_metadata = bld_mod.get_function_metadata()
         executor_factory = _graph_executor_factory.GraphExecutorFactoryModule(
-            ir_mod, target, graph_json, runtime_mod, mod_name, params
+            ir_mod, target, graph_json, runtime_mod, mod_name, params, func_metadata
         )
         return executor_factory
 
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 07bb51150bee5..7b11061ac96df 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -75,6 +75,10 @@ struct GraphCodegen {
     return CallFunc<Array<tvm::runtime::Module>>("get_external_modules", nullptr);
   }
 
+  Map<String, FunctionInfo> GetFunctionMetadata() {
+    return CallFunc<Map<String, FunctionInfo>>("get_function_metadata", nullptr);
+  }
+
   Map<String, IRModule> GetIRModule() {
     return CallFunc<Map<String, IRModule>>("get_irmodule", nullptr);
   }
@@ -161,6 +165,10 @@ class RelayBuildModule : public runtime::ModuleNode {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
         *rv = this->graph_codegen_->GetExternalModules();
       });
+    } else if (name == "get_function_metadata") {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        *rv = this->graph_codegen_->GetFunctionMetadata();
+      });
     } else if (name == "optimize") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
         ICHECK_EQ(args.num_args, 2);
diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
index 72989b5ba46aa..1c0e74ae0133a 100644
--- a/src/relay/backend/graph_executor_codegen.cc
+++ b/src/relay/backend/graph_executor_codegen.cc
@@ -27,6 +27,8 @@
 #include <tvm/ir/module.h>
 #include <tvm/relay/expr_functor.h>
 #include <tvm/runtime/device_api.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/function.h>
 
 #include <list>
 #include <string>
@@ -56,6 +58,7 @@ struct LoweredOutput {
   std::string graph_json;
   Map<String, IRModule> lowered_funcs;
   Array<tvm::runtime::Module> external_mods;
+  Map<String, FunctionInfo> function_metadata;
   std::unordered_map<std::string, std::pair<int, const tvm::runtime::NDArray>> params;
 };
 
@@ -189,9 +192,102 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
     targets_ = targets;
   }
 
+  /*!
+   * \brief Calculate the storage required to store the type of relay.Expr
+   *
+   * \param func The relay expr for which the storage is calculated
+   */
+  int64_t CalculateRelayExprSizeBytes(const Type& expr_type) {
+    if (expr_type->IsInstance<TupleTypeNode>()) {
+      auto tuple_type = Downcast<TupleType>(expr_type);
+      int64_t size = 0;
+      for (const auto& field : tuple_type->fields) {
+        size += CalculateRelayExprSizeBytes(field);
+      }
+      return size;
+    }
+    auto tensor_type = expr_type.as<TensorTypeNode>();
+    auto shape = tensor_type->shape;
+    int num_of_elements = 1;
+    for (const auto& dim_index_expr : shape) {
+      if (dim_index_expr->IsInstance<IntImmNode>()) {
+        num_of_elements *= dim_index_expr.as<IntImmNode>()->value;
+      } else {
+        // If shape is dynamic, we cannot calculate workspace in compile time.
+        num_of_elements = 0;
+      }
+    }
+    auto element_size = tensor_type->dtype.bytes();
+    return element_size * num_of_elements;
+  }
+
+  /*!
+   * \brief Update the "main" control function's metadata
+   *
+   * \param func The main function that contains calls to relay primitive functions
+   */
+  void UpdateMainWorkspaceSize(const Function& func) {
+    std::unordered_map<int, std::unordered_map<int, int>> sid_workspace;
+    std::unordered_map<int, int> device_workspace;
+    uint64_t params_size = 0;
+    uint64_t input_size = 0;
+    uint64_t output_size = 0;
+
+    for (const auto& kv : storage_device_map_) {
+      auto sids = kv.second[0];
+      auto devices = kv.second[1];
+      CHECK_EQ(sids.size(), devices.size());
+      for (uint32_t i = 0; i < sids.size(); i++) {
+        sid_workspace[devices[i]][sids[i]] = 0;
+      }
+    }
+
+    for (const auto& kv : storage_device_map_) {
+      auto size_bytes = CalculateRelayExprSizeBytes(kv.first->checked_type());
+      if (kv.first->IsInstance<ConstantNode>()) {
+        params_size += size_bytes;
+        continue;
+      } else if (kv.first->IsInstance<VarNode>()) {
+        input_size += size_bytes;
+        continue;
+      } else if (kv.first == func->body) {
+        output_size += size_bytes;
+        continue;
+      }
+      auto sids = kv.second[0];
+      auto devices = kv.second[1];
+      for (uint32_t i = 0; i < sids.size(); i++) {
+        if (size_bytes > sid_workspace[devices[i]][sids[i]]) {
+          sid_workspace[devices[i]][sids[i]] = size_bytes;
+        }
+      }
+    }
+
+    for (const auto& dev_sid_size : sid_workspace) {
+      auto dev = dev_sid_size.first;
+      device_workspace[dev] = 0;
+      for (const auto& sid_size : dev_sid_size.second) {
+        device_workspace[dev] += sid_size.second;
+      }
+    }
+
+    auto fi_node = make_object<FunctionInfoNode>();
+    for (const auto& dev_and_size : device_workspace) {
+      auto tgt = GetTargetFromInteger(dev_and_size.first);
+      fi_node->workspace_sizes.Set(tgt, dev_and_size.second);
+    }
+
+    fi_node->io_sizes.Set(GetTargetFromInteger(0), input_size + output_size);
+    fi_node->constant_sizes.Set(GetTargetFromInteger(0), params_size);
+    fi_node->relay_primfuncs.Set(GetTargetFromInteger(0), func);
+
+    function_metadata_.Set(kMainFuncStr, FunctionInfo(fi_node));
+  }
+
   LoweredOutput Codegen(relay::Function func) {
     auto pf = GetPackedFunc("relay.backend.GraphPlanMemory");
     storage_device_map_ = (*pf)(func);
+    UpdateMainWorkspaceSize(func);
     // First we convert all the parameters into input nodes.
     for (auto param : func->params) {
       auto node_ptr = GraphInputNode::make_node_ptr(param->name_hint(), GraphAttrs());
@@ -219,6 +315,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
       ret.lowered_funcs.Set(kv.first, mod);
     }
     ret.external_mods = compile_engine_->LowerExternalFunctions();
+    ret.function_metadata = std::move(function_metadata_);
     return ret;
   }
 
@@ -349,6 +446,75 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
     return AddNode(node, GetRef<Expr>(op));
   }
 
+  /*!
+   * \brief Obtain the Target from the device type.
+   * If homogenous compilation, this will return the only target.
+   * If heteregenous compilation, this will select associated using the targets_ Map.
+   *
+   * \param dev_type
+   * \return Target
+   */
+  Target GetTargetFromInteger(int64_t dev_type) {
+    if (targets_.size() == 1) {
+      // homogeneous execution.
+      const auto& it = targets_.begin();
+      return (*it).second;
+    } else {
+      // heterogeneous execution.
+      std::string call_dev_name;
+      if (dev_type == 0) {
+        call_dev_name = "llvm";
+      } else {
+        call_dev_name = runtime::DeviceName(dev_type);
+      }
+      if (targets_.count(dev_type) == 0) {
+        LOG(FATAL) << "No target is provided for device " << call_dev_name;
+      }
+      return targets_[dev_type];
+    }
+  }
+
+  /*!
+   * \brief Update the function metadata for a given cached function and its relay
+   * primitive function.
+   *
+   * \param cfunc The cached function as provided the by the compile engine
+   * \param relay_func The source relay primitive function
+   * \param relay_target The target associated with relay primitive function
+   */
+  void UpdateFunctionMetadata(const CachedFunc& cfunc, const Function& relay_func,
+                              const Target& relay_target) {
+    auto fi_node = make_object<FunctionInfoNode>();
+    for (const auto& kv : cfunc->funcs->functions) {
+      auto primfunc = Downcast<tir::PrimFunc>(kv.second);
+      Integer workspace_size = CalculateWorkspaceBytes(primfunc);
+      Target primfunc_target = relay_target;
+      if (primfunc->attrs->dict.count("target")) {
+        primfunc_target = Downcast<Target>(primfunc->attrs->dict["target"]);
+      }
+      fi_node->workspace_sizes.Set(primfunc_target, workspace_size);
+      // Calculating size for I/O
+      for (auto const& param : primfunc->params) {
+        auto p_shape = primfunc->buffer_map[param]->shape;
+        int num_of_elements = 1;
+        for (const auto& dim_index_expr : p_shape) {
+          if (dim_index_expr->IsInstance<IntImmNode>()) {
+            num_of_elements *= dim_index_expr.as<IntImmNode>()->value;
+          } else {
+            // If shape is dynamic, we cannot calculate workspace in compile time.
+            num_of_elements = 0;
+          }
+        }
+        int element_size = primfunc->buffer_map[param]->dtype.bytes();
+        fi_node->io_sizes.Set(primfunc_target, element_size * num_of_elements);
+      }
+      fi_node->constant_sizes.Set(primfunc_target, 0);
+      fi_node->tir_primfuncs.Set(primfunc_target, primfunc);
+      fi_node->relay_primfuncs.Set(primfunc_target, relay_func);
+    }
+    function_metadata_.Set(cfunc->func_name, FunctionInfo(fi_node));
+  }
+
   std::vector<GraphNodeRef> VisitExpr_(const CallNode* op) override {
     Expr expr = GetRef<Expr>(op);
     Function func;
@@ -383,30 +549,19 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
     ICHECK_GE(storage_device_map_.count(expr), 0);
     auto& device_type = storage_device_map_[expr][1];
     auto call_dev_type = device_type[0]->value;
+    target = GetTargetFromInteger(call_dev_type);
     // Normal Relay Function
-    if (targets_.size() == 1) {
-      // homogeneous execution.
-      const auto& it = targets_.begin();
-      target = (*it).second;
-    } else {
-      // heterogeneous execution.
-      std::string call_dev_name;
-      if (call_dev_type == 0) {
-        call_dev_name = "llvm";
-      } else {
-        call_dev_name = runtime::DeviceName(call_dev_type);
-      }
-      if (targets_.count(call_dev_type) == 0) {
-        LOG(FATAL) << "No target is provided for device " << call_dev_name;
-      }
-      target = targets_[call_dev_type];
-    }
+
     CCacheKey key = (*pf0)(func, target);
     CachedFunc lowered_func = (*pf1)(compile_engine_, key);
     if (!lowered_funcs_.count(target->str())) {
       lowered_funcs_[target->str()] = IRModule(Map<GlobalVar, BaseFunc>({}));
     }
     lowered_funcs_[target->str()]->Update(lowered_func->funcs);
+
+    // Update function metadata via looking at all primfuncs
+    UpdateFunctionMetadata(lowered_func, func, target);
+
     return GraphAddCallNode(op, _GetUniqueName(lowered_func->func_name), lowered_func->func_name);
   }
 
@@ -551,10 +706,14 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
   Map<Expr, Array<IntegerArray>> storage_device_map_;
   /*! \brief lowered funcs */
   std::unordered_map<std::string, IRModule> lowered_funcs_;
+  /*! \brief lowered funcs */
+  Map<String, FunctionInfo> function_metadata_;
   /*! \brief name map */
   std::unordered_map<std::string, size_t> name_map_;
   /*! \brief compile engine */
   CompileEngine compile_engine_;
+  /*! \brief main function name */
+  const String kMainFuncStr = "main_func";
 };
 
 class GraphExecutorCodegenModule : public runtime::ModuleNode {
@@ -614,6 +773,10 @@ class GraphExecutorCodegenModule : public runtime::ModuleNode {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
         *rv = this->output_.external_mods;
       });
+    } else if (name == "get_function_metadata") {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        *rv = this->output_.function_metadata;
+      });
     } else {
       return PackedFunc([](TVMArgs args, TVMRetValue* rv) {});
     }
diff --git a/src/relay/backend/utils.cc b/src/relay/backend/utils.cc
new file mode 100644
index 0000000000000..ba865d9d0a5b6
--- /dev/null
+++ b/src/relay/backend/utils.cc
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/util.cc
+ * \brief Relay backend utilities.
+ */
+
+#include "utils.h"
+
+namespace tvm {
+namespace relay {
+namespace backend {
+
+void FunctionInfo::SetWorkspaceSize(Target tgt, tvm::Integer size) {
+  (*this)->workspace_sizes.Set(tgt, size);
+}
+
+TVM_REGISTER_NODE_TYPE(FunctionInfoNode);
+TVM_REGISTER_GLOBAL("relay.backend.FunctionInfo").set_body_typed([]() { return FunctionInfo(); });
+TVM_REGISTER_GLOBAL("relay.backend._FunctionInfo_SetWorkspaceSize")
+    .set_body_typed([](FunctionInfo fi, Target target, Integer size) {
+      return fi.SetWorkspaceSize(target, size);
+    });
+
+}  // namespace backend
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
index 6908ca85f5827..ccb1269184080 100644
--- a/src/relay/backend/utils.h
+++ b/src/relay/backend/utils.h
@@ -43,6 +43,31 @@ namespace tvm {
 namespace relay {
 namespace backend {
 
+struct FunctionInfoNode : public Object {
+  Map<Target, Integer> workspace_sizes;
+  Map<Target, Integer> io_sizes;
+  Map<Target, Integer> constant_sizes;
+  Map<Target, tir::PrimFunc> tir_primfuncs;
+  Map<Target, Function> relay_primfuncs;
+
+  void VisitAttrs(tvm::AttrVisitor* v) {
+    v->Visit("workspace_sizes", &workspace_sizes);
+    v->Visit("io_sizes", &io_sizes);
+    v->Visit("constant_sizes", &constant_sizes);
+    v->Visit("tir_primfuncs", &tir_primfuncs);
+    v->Visit("relay_primfuncs", &relay_primfuncs);
+  }
+
+  static constexpr const char* _type_key = "relay.backend.FunctionInfo";
+  TVM_DECLARE_FINAL_OBJECT_INFO(FunctionInfoNode, Object);
+};
+
+class FunctionInfo : public ObjectRef {
+ public:
+  void SetWorkspaceSize(Target func_var, Integer size);
+  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(FunctionInfo, ObjectRef, FunctionInfoNode);
+};
+
 /*!
  * \brief A helper to expand the params by adding the ones used in a given expression.
  */
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index db6c55bca12ae..0a1cb346203db 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -78,19 +78,35 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 
         with open(os.path.join(extract_dir, "metadata.json")) as json_f:
             metadata = json.load(json_f)
-            assert metadata["version"] == 1
+            assert metadata["version"] == 2
             assert metadata["model_name"] == "add"
             export_datetime = datetime.datetime.strptime(
                 metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
             )
             assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
             assert metadata["target"] == {"1": str(target)}
-            assert metadata["memory"] == [
+            assert metadata["memory"]["sids"] == [
                 {"storage_id": 0, "size_bytes": 2, "input_binding": "a"},
                 {"storage_id": 1, "size_bytes": 8, "input_binding": "b"},
                 {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"},
                 {"storage_id": 3, "size_bytes": 8},
             ]
+            assert metadata["memory"]["functions"] == {
+                "main_function": [
+                    {
+                        "constants_size_bytes": 8,
+                        "device": 1,
+                        "io_size_bytes": 18,
+                        "workspace_size_bytes": 0,
+                    }
+                ],
+                "operator_functions": [
+                    {
+                        "function_name": "fused_cast_multiply_add",
+                        "workspace": [{"device": 1, "workspace_size_bytes": 0}],
+                    }
+                ],
+            }
 
         assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib0.c"))
         assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib1.c"))
@@ -141,19 +157,35 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 
         with open(os.path.join(extract_dir, "metadata.json")) as json_f:
             metadata = json.load(json_f)
-            assert metadata["version"] == 1
+            assert metadata["version"] == 2
             assert metadata["model_name"] == "add"
             export_datetime = datetime.datetime.strptime(
                 metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
             )
             assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
             assert metadata["target"] == {"1": str(target)}
-            assert metadata["memory"] == [
+            assert metadata["memory"]["sids"] == [
                 {"storage_id": 0, "size_bytes": 2, "input_binding": "a"},
                 {"storage_id": 1, "size_bytes": 8, "input_binding": "b"},
                 {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"},
                 {"storage_id": 3, "size_bytes": 8},
             ]
+            assert metadata["memory"]["functions"] == {
+                "main_function": [
+                    {
+                        "constants_size_bytes": 8,
+                        "device": 1,
+                        "io_size_bytes": 18,
+                        "workspace_size_bytes": 0,
+                    }
+                ],
+                "operator_functions": [
+                    {
+                        "function_name": "fused_cast_multiply_add_1",
+                        "workspace": [{"device": 1, "workspace_size_bytes": 0}],
+                    }
+                ],
+            }
 
         assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "lib", "lib0.o"))
 
@@ -167,11 +199,68 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
             assert "p0" in params
 
 
+@tvm.testing.requires_micro
+def test_export_model_library_format_workspace():
+    with utils.TempDirectory.set_keep_for_debug(True):
+        target = tvm.target.target.micro("host")
+        with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
+            relay_mod = tvm.parser.fromtext(
+                """
+                #[version = "0.0.5"]
+                def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int16], %p2: Tensor[(1, 1, 1, 128), int32]){
+                  %0 = nn.conv2d(%p0, %p1, padding=[1, 1, 1, 1], groups=128, channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWOI", out_dtype="int32") /* ty=Tensor[(1, 56, 56, 128), int32] */;
+                  %1 = add(%0, %p2) /* ty=Tensor[(1, 56, 56, 128), int32] */;
+                  %2 = fixed_point_multiply(%1, multiplier=2080045879, shift=-4) /* ty=Tensor[(1, 56, 56, 128), int32] */;
+                  %3 = clip(%2, a_min=0f, a_max=255f) /* ty=Tensor[(1, 56, 56, 128), int32] */;
+                  cast(%3, dtype="uint8") /* ty=Tensor[(1, 56, 56, 128), uint8] */
+                }
+                """
+            )
+            factory = tvm.relay.build(relay_mod, target, target_host=target, mod_name="qnn_conv2d")
+
+        temp_dir = utils.tempdir()
+        mlf_tar_path = temp_dir.relpath("lib.tar")
+        import tvm.micro as micro
+
+        micro.export_model_library_format(factory, mlf_tar_path)
+        tf = tarfile.open(mlf_tar_path)
+
+        extract_dir = temp_dir.relpath("extract")
+        os.mkdir(extract_dir)
+        tf.extractall(extract_dir)
+
+        with open(os.path.join(extract_dir, "metadata.json")) as json_f:
+            metadata = json.load(json_f)
+            assert metadata["version"] == 2
+            assert metadata["model_name"] == "qnn_conv2d"
+            export_datetime = datetime.datetime.strptime(
+                metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
+            )
+            assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
+            assert metadata["target"] == {"1": str(target)}
+            assert metadata["memory"]["functions"] == {
+                "main_function": [
+                    {
+                        "constants_size_bytes": 0,
+                        "device": 1,
+                        "io_size_bytes": 1207040,
+                        "workspace_size_bytes": 2466816,
+                    }
+                ],
+                "operator_functions": [
+                    {
+                        "function_name": "fused_nn_conv2d_add_fixed_point_multiply_clip_cast",
+                        "workspace": [{"device": 1, "workspace_size_bytes": 2466816}],
+                    }
+                ],
+            }
+
+
 @tvm.testing.requires_micro
 def test_export_model():
     module = tvm.support.FrontendTestModule()
     factory = graph_executor_factory.GraphExecutorFactoryModule(
-        None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}
+        None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}, {}
     )
 
     temp_dir = utils.tempdir()