From 5ef8b02c8d92cbf65c721d23651862360a01df41 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 13 Dec 2021 19:55:00 +0100
Subject: [PATCH 001/112] Add minimal working structure for generic interface

---
 cmake/config.cmake                            |   3 -
 cmake/modules/contrib/Generic.cmake           |  22 ++++
 .../relay/backend/contrib/generic/__init__.py |  18 +++
 .../relay/backend/contrib/generic/codegen.py  |  55 +++++++++
 .../tvm/relay/backend/contrib/generic/run.py  |  50 ++++++++
 src/relay/backend/contrib/generic/codegen.cc  | 112 ++++++++++++++++++
 6 files changed, 257 insertions(+), 3 deletions(-)
 create mode 100644 cmake/modules/contrib/Generic.cmake
 create mode 100644 python/tvm/relay/backend/contrib/generic/__init__.py
 create mode 100644 python/tvm/relay/backend/contrib/generic/codegen.py
 create mode 100644 python/tvm/relay/backend/contrib/generic/run.py
 create mode 100644 src/relay/backend/contrib/generic/codegen.cc

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 4cd10f104a83..973c0853c713 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -296,9 +296,6 @@ set(USE_VTA_FPGA OFF)
 # Whether use Thrust
 set(USE_THRUST OFF)
 
-# Whether use cuRAND
-set(USE_CURAND OFF)
-
 # Whether to build the TensorFlow TVMDSOOp module
 set(USE_TF_TVMDSOOP OFF)
 
diff --git a/cmake/modules/contrib/Generic.cmake b/cmake/modules/contrib/Generic.cmake
new file mode 100644
index 000000000000..e03087b1cbc2
--- /dev/null
+++ b/cmake/modules/contrib/Generic.cmake
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_GENERIC)
+  file(GLOB COMPILER_GENERIC_SRCS
+       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/*)
+  list(APPEND COMPILER_SRCS ${COMPILER_GENERIC_SRCS})
+endif(USE_GENERIC)
diff --git a/python/tvm/relay/backend/contrib/generic/__init__.py b/python/tvm/relay/backend/contrib/generic/__init__.py
new file mode 100644
index 000000000000..1e1dd56c1321
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Generic Scale4Edge NPUs codegen modules for Relay."""
+from . import codegen
diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
new file mode 100644
index 000000000000..2e9cdc5e9c9f
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen for Scale4Edge NPUs"""
+
+import tvm
+from tvm import relay, te, tir
+
+
+@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func")
+def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc:
+    """
+    This is the hook for python-based lowering of relay function
+    that gets offloaded to the target NPU.
+
+    Parameters
+    ----------
+    ext_func : relay.Function
+        This is the partitioned relay function
+
+    Returns
+    -------
+    primfunc : tir.PrimFunc
+        This returns the scheduled PrimFunc
+    """
+    f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
+    te_func = f(ext_func)
+    primfunc = te.create_prim_func_from_outputs(te_func.outputs)
+    primfunc = primfunc.with_attr("global_symbol", ext_func.attrs["global_symbol"])
+
+    mod = tvm.IRModule()
+    mod["main"] = primfunc
+    mod = tir.transform.StorageFlatten(64, False)(mod)
+    mod = tir.transform.LowerInitBlock()(mod)
+    mod = tir.transform.PlanAndUpdateBufferAllocationLocation()(mod)
+    mod = tir.transform.ConvertBlocksToOpaque()(mod)
+    mod = tir.transform.CompactBufferAllocation()(mod)
+    mod = tir.transform.LowerMatchBuffer()(mod)
+    mod = tir.transform.FlattenBuffer()(mod)
+    mod = tir.transform.Simplify()(mod)
+
+    return mod["main"]
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
new file mode 100644
index 000000000000..dc9262b4c63a
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -0,0 +1,50 @@
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib import generic
+
+import torch
+
+
+class TorchModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv = torch.nn.Conv1d(
+            16, 24, 9, bias=False, padding=0, stride=1, dilation=1, groups=1
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.generic")
+    def _func_wrapper(expr):
+        return supported
+
+    return _func_wrapper
+
+
+_register_external_op_helper("nn.conv1d")
+
+
+def main():
+    torch_mod = TorchModel()
+
+    # Pytorch frontend
+    input_shape = (1, 16, 20)
+    dummy_input = torch.randn(input_shape)
+    scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
+    mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
+
+    mod = relay.transform.AnnotateTarget("generic")(mod)
+    mod = relay.transform.MergeCompilerRegions()(mod)
+    mod = relay.transform.PartitionGraph()(mod)
+    print(mod)
+
+    lib = relay.build(mod, tvm.target.Target("c"))
+    print(lib)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/relay/backend/contrib/generic/codegen.cc b/src/relay/backend/contrib/generic/codegen.cc
new file mode 100644
index 000000000000..1b4504bb7201
--- /dev/null
+++ b/src/relay/backend/contrib/generic/codegen.cc
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/generic/codegen.cc
+ *
+ * \brief This file contains the target hooks for generic Scale4Edge Codegen.
+ */
+
+#include <tvm/ir/error.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/attrs/annotation.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/target/target.h>
+#include <tvm/tir/function.h>
+
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace generic {
+
+/*!
+ * \brief This mutator lowers each external
+ * relay function to a TIR PrimFunc
+ *
+ * TODO: Just a slightly modified copy of Ethos-U. Needs refactoring for generic use-case.
+ */
+class RelayToTIRMutator : public MixedModeMutator {
+ public:
+  explicit RelayToTIRMutator(IRModule ir_module) : ir_module_(ir_module) {}
+
+  IRModule operator()() {
+    GlobalVar main_global_var = ir_module_->GetGlobalVar("main");
+    Function main_func = Downcast<Function>(ir_module_->Lookup(main_global_var));
+
+    // Copy everything across and mutate the body
+    Function mutated_main =
+        Function(main_func->params, VisitExpr(main_func->body), main_func->ret_type,
+                 main_func->type_params, main_func->attrs, main_func->span);
+
+    ir_module_->Update(main_global_var, mutated_main);
+    ir_module_ = WithAttr(ir_module_, "device_contexts", device_contexts_);
+    return ir_module_;
+  }
+
+  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
+    Call call = Downcast<Call>(post);
+    if (call->op->IsInstance<FunctionNode>()) {
+      Function func = Downcast<Function>(call->op);
+      auto codegen_name = func->GetAttr<String>(attr::kCompiler);
+      if (codegen_name.defined() && codegen_name == "generic") {
+        auto relay_to_tir_func_pf =
+            tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func");
+        ICHECK(relay_to_tir_func_pf);
+        tir::PrimFunc prim_func = (*relay_to_tir_func_pf)(func);
+        prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target("c"));
+        String symbol_name = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol).value();
+        GlobalVar gv(symbol_name);
+        Array<RelayExpr> args = call->args;
+        gv->checked_type_ = func->checked_type();
+        ir_module_->Update(gv, prim_func);
+        device_contexts_.Set(gv, codegen_name.value());
+        return Call(gv, args, call->attrs, call->type_args);
+      }
+    }
+    return post;
+  }
+
+ private:
+  IRModule ir_module_;
+  Map<GlobalVar, String> device_contexts_;
+};
+
+tvm::transform::Pass RelayToTIR() {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule ir_module, transform::PassContext pass_context) {
+        return RelayToTIRMutator(ir_module)();
+      };
+  return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.generic.RelayToTIR", {});
+}
+
+TVM_REGISTER_TARGET_KIND("generic", kDLCPU)
+    .set_attr<Bool>("use_device_api", Bool(true))
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", RelayToTIR());
+
+}  // namespace generic
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm

From c22f3e5e459b6d9655dbeff5acdf45e6a7f91dde Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 15 Dec 2021 11:46:46 +0100
Subject: [PATCH 002/112] Separate target definition from codegen

---
 .../tvm/relay/backend/contrib/generic/run.py  |  5 ++-
 src/relay/backend/contrib/generic/codegen.cc  | 24 +++++-----
 src/relay/backend/contrib/generic/targets.cc  | 45 +++++++++++++++++++
 3 files changed, 58 insertions(+), 16 deletions(-)
 create mode 100644 src/relay/backend/contrib/generic/targets.cc

diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index dc9262b4c63a..78127e2e9147 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -17,8 +17,9 @@ def forward(self, x):
         return x
 
 
+custom_target_name = "ultra_trail"
 def _register_external_op_helper(op_name, supported=True):
-    @tvm.ir.register_op_attr(op_name, "target.generic")
+    @tvm.ir.register_op_attr(op_name, f"target.{custom_target_name}")
     def _func_wrapper(expr):
         return supported
 
@@ -37,7 +38,7 @@ def main():
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
-    mod = relay.transform.AnnotateTarget("generic")(mod)
+    mod = relay.transform.AnnotateTarget(custom_target_name)(mod)
     mod = relay.transform.MergeCompilerRegions()(mod)
     mod = relay.transform.PartitionGraph()(mod)
     print(mod)
diff --git a/src/relay/backend/contrib/generic/codegen.cc b/src/relay/backend/contrib/generic/codegen.cc
index 1b4504bb7201..d1667b6aa2b8 100644
--- a/src/relay/backend/contrib/generic/codegen.cc
+++ b/src/relay/backend/contrib/generic/codegen.cc
@@ -20,7 +20,7 @@
 /*!
  * \file relay/backend/contrib/generic/codegen.cc
  *
- * \brief This file contains the target hooks for generic Scale4Edge Codegen.
+ * \brief this file contains the target hooks for generic scale4edge codegen.
  */
 
 #include <tvm/ir/error.h>
@@ -50,7 +50,9 @@ namespace generic {
  */
 class RelayToTIRMutator : public MixedModeMutator {
  public:
-  explicit RelayToTIRMutator(IRModule ir_module) : ir_module_(ir_module) {}
+  explicit RelayToTIRMutator(IRModule ir_module, String target_name)
+      : ir_module_(ir_module),
+        target_name_(target_name) {}
 
   IRModule operator()() {
     GlobalVar main_global_var = ir_module_->GetGlobalVar("main");
@@ -62,7 +64,7 @@ class RelayToTIRMutator : public MixedModeMutator {
                  main_func->type_params, main_func->attrs, main_func->span);
 
     ir_module_->Update(main_global_var, mutated_main);
-    ir_module_ = WithAttr(ir_module_, "device_contexts", device_contexts_);
+
     return ir_module_;
   }
 
@@ -71,7 +73,7 @@ class RelayToTIRMutator : public MixedModeMutator {
     if (call->op->IsInstance<FunctionNode>()) {
       Function func = Downcast<Function>(call->op);
       auto codegen_name = func->GetAttr<String>(attr::kCompiler);
-      if (codegen_name.defined() && codegen_name == "generic") {
+      if (codegen_name.defined() && codegen_name == target_name_) {
         auto relay_to_tir_func_pf =
             tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func");
         ICHECK(relay_to_tir_func_pf);
@@ -79,11 +81,9 @@ class RelayToTIRMutator : public MixedModeMutator {
         prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target("c"));
         String symbol_name = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol).value();
         GlobalVar gv(symbol_name);
-        Array<RelayExpr> args = call->args;
         gv->checked_type_ = func->checked_type();
         ir_module_->Update(gv, prim_func);
-        device_contexts_.Set(gv, codegen_name.value());
-        return Call(gv, args, call->attrs, call->type_args);
+        return Call(gv, call->args, call->attrs, call->type_args);
       }
     }
     return post;
@@ -91,21 +91,17 @@ class RelayToTIRMutator : public MixedModeMutator {
 
  private:
   IRModule ir_module_;
-  Map<GlobalVar, String> device_contexts_;
+  String target_name_;
 };
 
-tvm::transform::Pass RelayToTIR() {
+tvm::transform::Pass RelayToTIR(String target_name) {
   runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
       [=](IRModule ir_module, transform::PassContext pass_context) {
-        return RelayToTIRMutator(ir_module)();
+        return RelayToTIRMutator(ir_module, target_name)();
       };
   return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.generic.RelayToTIR", {});
 }
 
-TVM_REGISTER_TARGET_KIND("generic", kDLCPU)
-    .set_attr<Bool>("use_device_api", Bool(true))
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", RelayToTIR());
-
 }  // namespace generic
 }  // namespace contrib
 }  // namespace relay
diff --git a/src/relay/backend/contrib/generic/targets.cc b/src/relay/backend/contrib/generic/targets.cc
new file mode 100644
index 000000000000..dcd80a46162b
--- /dev/null
+++ b/src/relay/backend/contrib/generic/targets.cc
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/generic/codegen.cc
+ *
+ * \brief this file contains the targets for generic scale4edge codegen.
+ */
+
+#include <tvm/relay/transform.h>
+#include <tvm/target/target.h>
+
+namespace tvm {
+
+namespace relay {
+namespace contrib {
+namespace generic {
+    tvm::transform::Pass RelayToTIR(String target_name);
+}  // namespace generic
+}  // namespace contrib
+}  // namespace relay
+
+TVM_REGISTER_TARGET_KIND("ultra_trail", kDLCPU)
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("ultra_trail"));
+
+TVM_REGISTER_TARGET_KIND("rb_npu", kDLCPU)
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("rb_npu"));
+
+}  // namespace tvm

From f63e28a500e2f836783fcf332cb2e976b6fa59b7 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 10 Jan 2022 11:59:26 +0100
Subject: [PATCH 003/112] Update file structure to support multiple NPU targets

---
 .../relay/backend/contrib/generic/__init__.py |  4 +-
 .../relay/backend/contrib/generic/codegen.py  | 87 +++++++++++--------
 .../backend/contrib/generic/rb_npu/codegen.py | 45 ++++++++++
 .../contrib/generic/ultra_trail/codegen.py    | 45 ++++++++++
 .../contrib/generic/ultra_trail/passes.py     |  0
 .../contrib/generic/ultra_trail/schedules.py  |  0
 src/relay/backend/contrib/generic/codegen.cc  |  2 +-
 7 files changed, 147 insertions(+), 36 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py

diff --git a/python/tvm/relay/backend/contrib/generic/__init__.py b/python/tvm/relay/backend/contrib/generic/__init__.py
index 1e1dd56c1321..0f99d47aa88a 100644
--- a/python/tvm/relay/backend/contrib/generic/__init__.py
+++ b/python/tvm/relay/backend/contrib/generic/__init__.py
@@ -14,5 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Generic Scale4Edge NPUs codegen modules for Relay."""
+"""Generic NPU codegen modules."""
 from . import codegen
+from ultra_trail import codegen
+from rb_npu import codegen
diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 2e9cdc5e9c9f..617d5c996f63 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -14,42 +14,61 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Codegen for Scale4Edge NPUs"""
+"""Generic codegen for NPUs"""
 
 import tvm
 from tvm import relay, te, tir
 
+class GenericCodegen(object):
+    def _lower_relay_to_tir(self, relay_prim_func : relay.Function) -> tvm.tir.PrimFunc:
+        """Lower a Relay primitive function to a S-TIR primitive function.
 
-@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func")
-def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc:
-    """
-    This is the hook for python-based lowering of relay function
-    that gets offloaded to the target NPU.
-
-    Parameters
-    ----------
-    ext_func : relay.Function
-        This is the partitioned relay function
-
-    Returns
-    -------
-    primfunc : tir.PrimFunc
-        This returns the scheduled PrimFunc
-    """
-    f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
-    te_func = f(ext_func)
-    primfunc = te.create_prim_func_from_outputs(te_func.outputs)
-    primfunc = primfunc.with_attr("global_symbol", ext_func.attrs["global_symbol"])
-
-    mod = tvm.IRModule()
-    mod["main"] = primfunc
-    mod = tir.transform.StorageFlatten(64, False)(mod)
-    mod = tir.transform.LowerInitBlock()(mod)
-    mod = tir.transform.PlanAndUpdateBufferAllocationLocation()(mod)
-    mod = tir.transform.ConvertBlocksToOpaque()(mod)
-    mod = tir.transform.CompactBufferAllocation()(mod)
-    mod = tir.transform.LowerMatchBuffer()(mod)
-    mod = tir.transform.FlattenBuffer()(mod)
-    mod = tir.transform.Simplify()(mod)
-
-    return mod["main"]
+        Parameters
+        ----------
+        prim_func : tvm.relay.Function
+            The Relay function to lower.
+
+        Returns
+        -------
+        out : tvm.tir.PrimFunc
+            The lowered schedulable TensorIR primitive function.
+
+        """
+        f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
+        te_cached_func = f(relay_prim_func)
+        tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
+        tir_prim_func = tir_prim_func.with_attr("global_symbol", relay_prim_func.attrs["global_symbol"])
+        return tir_prim_func
+
+    def _lower_stir_to_nstir(self, prim_func : tvm.tir.PrimFunc) -> tvm.tir.PrimFunc:
+        mod = tvm.IRModule()
+        mod["main"] = prim_func
+        mod = tir.transform.StorageFlatten(64, False)(mod)
+        mod = tir.transform.LowerInitBlock()(mod)
+        mod = tir.transform.PlanAndUpdateBufferAllocationLocation()(mod)
+        mod = tir.transform.ConvertBlocksToOpaque()(mod)
+        mod = tir.transform.CompactBufferAllocation()(mod)
+        mod = tir.transform.LowerMatchBuffer()(mod)
+        mod = tir.transform.FlattenBuffer()(mod)
+        mod = tir.transform.Simplify()(mod)
+        prim_func = mod["main"]
+        return prim_func
+
+    def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
+        """
+        This is the hook for python-based lowering of relay function
+        that gets offloaded to the target NPU.
+
+        Parameters
+        ----------
+        ext_func : relay.Function
+            The partitioned relay function.
+
+        Returns
+        -------
+        prim_func : tir.PrimFunc
+            The scheduled PrimFunc.
+        """
+        prim_func = self._lower_relay_to_tir(ext_func)
+        prim_func = self._lower_stir_to_nstir(prim_func)
+        return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
new file mode 100644
index 000000000000..cc96ce408a53
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen for the RP_NPU"""
+
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
+
+class RBNPUCodegen(GenericCodegen):
+    def __init__(self):
+        super(RBNPUCodegen, self).__init__()
+
+@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_rb_npu")
+def relay_to_tir_func_rb_npu(ext_func: relay.Function) -> tvm.tir.PrimFunc:
+    """
+    This is the hook for python-based lowering of relay function
+    that gets offloaded to the RB NPU.
+
+    Parameters
+    ----------
+    ext_func : relay.Function
+        This is the partitioned relay function
+
+    Returns
+    -------
+    prim_func : tir.PrimFunc
+        This returns the scheduled PrimFunc
+    """
+    codegen = RBNPUCodegen()
+    prim_func = codegen.relay_to_tir_func(ext_func)
+    return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
new file mode 100644
index 000000000000..f5c5ce78a14c
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen for the UltraTrail accelerator"""
+
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
+
+class UltraTrailCodegen(GenericCodegen):
+    def __init__(self):
+        super(UltraTrailCodegen, self).__init__()
+
+@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
+def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
+    """
+    This is the hook for python-based lowering of relay function
+    that gets offloaded to the UltraTrail accelerator.
+
+    Parameters
+    ----------
+    ext_func : relay.Function
+        This is the partitioned relay function
+
+    Returns
+    -------
+    prim_func : tir.PrimFunc
+        This returns the scheduled PrimFunc
+    """
+    codegen = UltraTrailCodegen()
+    prim_func = codegen.relay_to_tir_func(ext_func)
+    return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/src/relay/backend/contrib/generic/codegen.cc b/src/relay/backend/contrib/generic/codegen.cc
index d1667b6aa2b8..f192d5a1cc5d 100644
--- a/src/relay/backend/contrib/generic/codegen.cc
+++ b/src/relay/backend/contrib/generic/codegen.cc
@@ -75,7 +75,7 @@ class RelayToTIRMutator : public MixedModeMutator {
       auto codegen_name = func->GetAttr<String>(attr::kCompiler);
       if (codegen_name.defined() && codegen_name == target_name_) {
         auto relay_to_tir_func_pf =
-            tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func");
+            tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func_" + target_name_);
         ICHECK(relay_to_tir_func_pf);
         tir::PrimFunc prim_func = (*relay_to_tir_func_pf)(func);
         prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target("c"));

From adefb484c67140344b97ee4c567534a7861a42ff Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Tue, 11 Jan 2022 17:28:28 +0100
Subject: [PATCH 004/112] Add scheduling and pass support to codegen

---
 .../relay/backend/contrib/generic/__init__.py |  4 +-
 .../relay/backend/contrib/generic/codegen.py  | 30 ++++++++++++---
 .../backend/contrib/generic/rb_npu/codegen.py |  5 +++
 .../tvm/relay/backend/contrib/generic/run.py  | 22 ++---------
 .../contrib/generic/ultra_trail/codegen.py    |  8 ++++
 .../contrib/generic/ultra_trail/pattern.py    | 38 +++++++++++++++++++
 6 files changed, 80 insertions(+), 27 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py

diff --git a/python/tvm/relay/backend/contrib/generic/__init__.py b/python/tvm/relay/backend/contrib/generic/__init__.py
index 0f99d47aa88a..c22fe1606fd4 100644
--- a/python/tvm/relay/backend/contrib/generic/__init__.py
+++ b/python/tvm/relay/backend/contrib/generic/__init__.py
@@ -16,5 +16,5 @@
 # under the License.
 """Generic NPU codegen modules."""
 from . import codegen
-from ultra_trail import codegen
-from rb_npu import codegen
+from .ultra_trail import codegen
+from .rb_npu import codegen
diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 617d5c996f63..86cb52d7964a 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -16,11 +16,14 @@
 # under the License.
 """Generic codegen for NPUs"""
 
+from abc import abstractmethod
+from typing import Dict, List
 import tvm
 from tvm import relay, te, tir
 
+
 class GenericCodegen(object):
-    def _lower_relay_to_tir(self, relay_prim_func : relay.Function) -> tvm.tir.PrimFunc:
+    def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
 
         Parameters
@@ -37,12 +40,14 @@ def _lower_relay_to_tir(self, relay_prim_func : relay.Function) -> tvm.tir.PrimF
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
         tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
-        tir_prim_func = tir_prim_func.with_attr("global_symbol", relay_prim_func.attrs["global_symbol"])
+        tir_prim_func = tir_prim_func.with_attr(
+            "global_symbol", relay_prim_func.attrs["global_symbol"]
+        )
         return tir_prim_func
 
-    def _lower_stir_to_nstir(self, prim_func : tvm.tir.PrimFunc) -> tvm.tir.PrimFunc:
-        mod = tvm.IRModule()
-        mod["main"] = prim_func
+    def _lower_stir_to_nstir(self, schedule: tvm.tir.Schedule) -> tvm.tir.PrimFunc:
+        mod = schedule.mod
+        mod = self.apply_passes_before(mod)
         mod = tir.transform.StorageFlatten(64, False)(mod)
         mod = tir.transform.LowerInitBlock()(mod)
         mod = tir.transform.PlanAndUpdateBufferAllocationLocation()(mod)
@@ -51,9 +56,20 @@ def _lower_stir_to_nstir(self, prim_func : tvm.tir.PrimFunc) -> tvm.tir.PrimFunc
         mod = tir.transform.LowerMatchBuffer()(mod)
         mod = tir.transform.FlattenBuffer()(mod)
         mod = tir.transform.Simplify()(mod)
+        mod = self.apply_passes_after(mod)
         prim_func = mod["main"]
         return prim_func
 
+    @abstractmethod
+    def apply_schedules(self, schedule: tvm.tir.Schedule) -> tvm.tir.Schedule:
+        pass
+
+    def apply_passes_before(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
+        return mod
+
+    def apply_passes_after(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
+        return mod
+
     def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
         """
         This is the hook for python-based lowering of relay function
@@ -70,5 +86,7 @@ def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
             The scheduled PrimFunc.
         """
         prim_func = self._lower_relay_to_tir(ext_func)
-        prim_func = self._lower_stir_to_nstir(prim_func)
+        schedule = tir.Schedule(prim_func)
+        schedule = self.apply_schedules(schedule)
+        prim_func = self._lower_stir_to_nstir(schedule)
         return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
index cc96ce408a53..edaceb00898d 100644
--- a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
@@ -20,10 +20,15 @@
 from tvm import relay
 from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
 
+
 class RBNPUCodegen(GenericCodegen):
     def __init__(self):
         super(RBNPUCodegen, self).__init__()
 
+    def apply_schedules(self, schedule):
+        return schedule
+
+
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_rb_npu")
 def relay_to_tir_func_rb_npu(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     """
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index 78127e2e9147..a4a40095860e 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -1,6 +1,7 @@
 import tvm
+import tvm.relay.backend.contrib.generic
 from tvm import relay
-from tvm.relay.backend.contrib import generic
+from tvm.relay.backend.contrib.generic.ultra_trail.pattern import match_ultra_trail
 
 import torch
 
@@ -16,19 +17,6 @@ def forward(self, x):
         x = self.conv(x)
         return x
 
-
-custom_target_name = "ultra_trail"
-def _register_external_op_helper(op_name, supported=True):
-    @tvm.ir.register_op_attr(op_name, f"target.{custom_target_name}")
-    def _func_wrapper(expr):
-        return supported
-
-    return _func_wrapper
-
-
-_register_external_op_helper("nn.conv1d")
-
-
 def main():
     torch_mod = TorchModel()
 
@@ -38,11 +26,7 @@ def main():
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
-    mod = relay.transform.AnnotateTarget(custom_target_name)(mod)
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.PartitionGraph()(mod)
-    print(mod)
-
+    mod = match_ultra_trail(mod)
     lib = relay.build(mod, tvm.target.Target("c"))
     print(lib)
 
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index f5c5ce78a14c..0752b96466dc 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -24,6 +24,14 @@ class UltraTrailCodegen(GenericCodegen):
     def __init__(self):
         super(UltraTrailCodegen, self).__init__()
 
+    # 1. Tensorize
+    # 2. Generate Config
+    def apply_schedules(self, schedule):
+        n, k, c, f_x, x = schedule.get_loops(schedule.get_block("conv1d_ncw"))
+        k_0, k_1 = schedule.split(k, factors=[2, None])
+        schedule.reorder(n, k_0, c, f_x, x, k_1)
+        return schedule
+
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
 def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     """
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py
new file mode 100644
index 000000000000..ccfe5a31c07c
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Relay matching pattern for the UltraTrail accelerator"""
+
+import tvm
+from tvm import relay
+
+custom_target_name = "ultra_trail"
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, f"target.{custom_target_name}")
+    def _func_wrapper(expr):
+        return supported
+
+    return _func_wrapper
+
+
+def match_ultra_trail(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
+    _register_external_op_helper("nn.conv1d")
+    mod = relay.transform.AnnotateTarget(custom_target_name)(mod)
+    mod = relay.transform.MergeCompilerRegions()(mod)
+    mod = relay.transform.PartitionGraph()(mod)
+    return mod

From 47c55784c67fd2e7dea8fec7ac6c78bbe8baa2bc Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 13 Jan 2022 13:42:08 +0100
Subject: [PATCH 005/112] Update schedule function and pass registration

---
 .../relay/backend/contrib/generic/__init__.py |   1 -
 .../relay/backend/contrib/generic/codegen.py  | 108 ++++++++++++++----
 .../backend/contrib/generic/rb_npu/codegen.py |   7 +-
 .../tvm/relay/backend/contrib/generic/run.py  |   5 +-
 .../contrib/generic/ultra_trail/codegen.py    |  13 ++-
 .../{pattern.py => partitioner.py}            |   4 +-
 .../contrib/generic/ultra_trail/schedules.py  |  23 ++++
 7 files changed, 126 insertions(+), 35 deletions(-)
 rename python/tvm/relay/backend/contrib/generic/ultra_trail/{pattern.py => partitioner.py} (91%)

diff --git a/python/tvm/relay/backend/contrib/generic/__init__.py b/python/tvm/relay/backend/contrib/generic/__init__.py
index c22fe1606fd4..0f51b92b11cd 100644
--- a/python/tvm/relay/backend/contrib/generic/__init__.py
+++ b/python/tvm/relay/backend/contrib/generic/__init__.py
@@ -15,6 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 """Generic NPU codegen modules."""
-from . import codegen
 from .ultra_trail import codegen
 from .rb_npu import codegen
diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 86cb52d7964a..891cd10592cc 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -16,13 +16,77 @@
 # under the License.
 """Generic codegen for NPUs"""
 
-from abc import abstractmethod
-from typing import Dict, List
 import tvm
 from tvm import relay, te, tir
 
+from abc import abstractmethod
+from typing import List, Tuple, Callable
+
 
 class GenericCodegen(object):
+    def __init__(self) -> None:
+        self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
+        self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
+
+        self._register_tir_schedules()
+        self._register_tir_passes()
+
+    @abstractmethod
+    def _register_tir_schedules(self) -> None:
+        """Register a set of TIR scheduling functions which are applied to the schedule.
+
+        Example
+        -------
+        Here is an example of how two scheduling functions can be registered.
+
+        .. code-block:: python
+
+            def _register_tir_schedules(self):
+                self._register_tir_schedule(schedule_func_0)
+                self._register_tir_schedule(schedule_func_1)
+
+        Use `pass` if no scheduling function should be registerd.
+
+        .. code-block:: python
+
+            def _register_tir_schedules(self):
+                pass
+
+        """
+        pass
+
+    @abstractmethod
+    def _register_tir_passes(self) -> None:
+        """Register a set of TIR passes which are applied during lowering.
+
+        Example
+        -------
+        Here is an example of how two passes can be registered.
+
+        .. code-block:: python
+
+            def _register_tir_passes(self):
+                self._register_tir_pass(pass_0)
+                self._register_tir_pass(pass_1)
+
+        Use `pass` if no TIR pass should be registerd.
+
+        .. code-block:: python
+
+            def _register_tir_passes(self):
+                pass
+
+        """
+        pass
+
+    def _register_tir_schedule(
+        self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
+    ) -> None:
+        self._tir_schedules.append(sch_func)
+
+    def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
+        self._tir_passes.append((stage, tir_pass))
+
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
 
@@ -46,29 +110,25 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         return tir_prim_func
 
     def _lower_stir_to_nstir(self, schedule: tvm.tir.Schedule) -> tvm.tir.PrimFunc:
-        mod = schedule.mod
-        mod = self.apply_passes_before(mod)
-        mod = tir.transform.StorageFlatten(64, False)(mod)
-        mod = tir.transform.LowerInitBlock()(mod)
-        mod = tir.transform.PlanAndUpdateBufferAllocationLocation()(mod)
-        mod = tir.transform.ConvertBlocksToOpaque()(mod)
-        mod = tir.transform.CompactBufferAllocation()(mod)
-        mod = tir.transform.LowerMatchBuffer()(mod)
-        mod = tir.transform.FlattenBuffer()(mod)
-        mod = tir.transform.Simplify()(mod)
-        mod = self.apply_passes_after(mod)
-        prim_func = mod["main"]
-        return prim_func
+        """Lower a S-TIR schedule to a NS-TIR primitive function.
 
-    @abstractmethod
-    def apply_schedules(self, schedule: tvm.tir.Schedule) -> tvm.tir.Schedule:
-        pass
+        Parameters
+        ----------
+        schedule : tvm.tir.Schedule
+            The schedule to lower.
 
-    def apply_passes_before(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
-        return mod
+        Returns
+        -------
+        out : tvm.tir.PrimFunc
+            The lowered non-schedulable TensorIR primitive function.
 
-    def apply_passes_after(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
-        return mod
+        """
+        with tvm.transform.PassContext(
+            config={"tir.add_lower_pass": self._tir_passes}, opt_level=0
+        ):
+            mod = tvm.lower(schedule.mod)
+        prim_func = mod["main"]
+        return prim_func
 
     def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
         """
@@ -84,9 +144,11 @@ def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
         -------
         prim_func : tir.PrimFunc
             The scheduled PrimFunc.
+
         """
         prim_func = self._lower_relay_to_tir(ext_func)
         schedule = tir.Schedule(prim_func)
-        schedule = self.apply_schedules(schedule)
+        for sch_func in self._tir_schedules:
+            schedule = sch_func(schedule)
         prim_func = self._lower_stir_to_nstir(schedule)
         return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
index edaceb00898d..9326c792ffad 100644
--- a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
@@ -25,8 +25,11 @@ class RBNPUCodegen(GenericCodegen):
     def __init__(self):
         super(RBNPUCodegen, self).__init__()
 
-    def apply_schedules(self, schedule):
-        return schedule
+    def _register_tir_schedules(self):
+        pass
+
+    def _register_tir_passes(self):
+        pass
 
 
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_rb_npu")
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index a4a40095860e..7ba991271e0e 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -1,7 +1,7 @@
 import tvm
 import tvm.relay.backend.contrib.generic
+from tvm.relay.backend.contrib.generic.ultra_trail.partitioner import partition_for_ultra_trail
 from tvm import relay
-from tvm.relay.backend.contrib.generic.ultra_trail.pattern import match_ultra_trail
 
 import torch
 
@@ -17,6 +17,7 @@ def forward(self, x):
         x = self.conv(x)
         return x
 
+
 def main():
     torch_mod = TorchModel()
 
@@ -26,7 +27,7 @@ def main():
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
-    mod = match_ultra_trail(mod)
+    mod = partition_for_ultra_trail(mod)
     lib = relay.build(mod, tvm.target.Target("c"))
     print(lib)
 
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index 0752b96466dc..f031b903e642 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -19,6 +19,8 @@
 import tvm
 from tvm import relay
 from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
+from tvm.relay.backend.contrib.generic.ultra_trail.schedules import example_sch_func
+
 
 class UltraTrailCodegen(GenericCodegen):
     def __init__(self):
@@ -26,11 +28,12 @@ def __init__(self):
 
     # 1. Tensorize
     # 2. Generate Config
-    def apply_schedules(self, schedule):
-        n, k, c, f_x, x = schedule.get_loops(schedule.get_block("conv1d_ncw"))
-        k_0, k_1 = schedule.split(k, factors=[2, None])
-        schedule.reorder(n, k_0, c, f_x, x, k_1)
-        return schedule
+    def _register_tir_schedules(self):
+        self._register_tir_schedule(example_sch_func)
+
+    def _register_tir_passes(self):
+        self._register_tir_pass(0, tvm.tir.transform.Simplify())
+
 
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
 def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
similarity index 91%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py
rename to python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
index ccfe5a31c07c..885e6c6706a4 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/pattern.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Relay matching pattern for the UltraTrail accelerator"""
+"""Relay partitioner for the UltraTrail accelerator"""
 
 import tvm
 from tvm import relay
@@ -30,7 +30,7 @@ def _func_wrapper(expr):
     return _func_wrapper
 
 
-def match_ultra_trail(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
+def partition_for_ultra_trail(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
     _register_external_op_helper("nn.conv1d")
     mod = relay.transform.AnnotateTarget(custom_target_name)(mod)
     mod = relay.transform.MergeCompilerRegions()(mod)
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
index e69de29bb2d1..3861d53ed94b 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""TIR schedule functions for the UltraTrail accelerator"""
+
+def example_sch_func(sch):
+    n, k, c, f_x, x = sch.get_loops(sch.get_block("conv1d_ncw"))
+    k_0, k_1 = sch.split(k, factors=[2, None])
+    sch.reorder(n, k_0, c, f_x, x, k_1)
+    return sch

From a34e015d691863d7c42cf13f4028d21864049b36 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 13 Jan 2022 15:41:23 +0100
Subject: [PATCH 006/112] Add generic partitioner for relay graph partitioning

---
 .../backend/contrib/generic/partitioner.py    | 78 +++++++++++++++++++
 .../tvm/relay/backend/contrib/generic/run.py  |  5 +-
 .../generic/ultra_trail/partitioner.py        | 24 ++----
 3 files changed, 87 insertions(+), 20 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/generic/partitioner.py

diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/generic/partitioner.py
new file mode 100644
index 000000000000..61ebbbb3da21
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/partitioner.py
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Generic relay partitioner for NPUs"""
+
+import tvm
+from tvm import relay
+from typing import Callable
+from abc import abstractmethod
+
+
+class GenericPartitioner(object):
+    @property
+    @abstractmethod
+    def target_name(self) -> str:
+        """Name of the hardware target.
+
+        Returns
+        -------
+        out : str
+            The hardware target name.
+        """
+
+    @abstractmethod
+    def _register_supported_ops(self) -> None:
+        """Register a set of supported relay operations which are applied to the schedule.
+
+        Example
+        -------
+        Here is an example of how two supported operations can be registered.
+
+        .. code-block:: python
+
+            def _register_supported_ops(self):
+                self._register_supported_op(op_0)
+                self._register_supported_op(op_1)
+        """
+        pass
+
+    def _register_supported_op(self, op: str) -> Callable:
+        @tvm.ir.register_op_attr(op, "target.{}".format(self.target_name))
+        def _func_wrapper(_):
+            return True
+
+        return _func_wrapper
+
+    def __call__(self, mod: tvm.IRModule) -> tvm.IRModule:
+        """Partition the relay graph in by the NPU supported and unsupported parts.
+
+        Parameters
+        ----------
+        mod : tvm.IRModule
+            The relay module to be partitioned.
+
+        Returns
+        -------
+        out : tvm.IRModule
+            The partitioned relay module.
+
+        """
+        self._register_supported_ops()
+        mod = relay.transform.AnnotateTarget(self.target_name)(mod)
+        mod = relay.transform.MergeCompilerRegions()(mod)
+        mod = relay.transform.PartitionGraph()(mod)
+        return mod
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index 7ba991271e0e..e544b580ed3e 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -1,11 +1,10 @@
 import tvm
 import tvm.relay.backend.contrib.generic
-from tvm.relay.backend.contrib.generic.ultra_trail.partitioner import partition_for_ultra_trail
+from tvm.relay.backend.contrib.generic.ultra_trail.partitioner import UltraTrailPartitioner
 from tvm import relay
 
 import torch
 
-
 class TorchModel(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -27,7 +26,7 @@ def main():
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
-    mod = partition_for_ultra_trail(mod)
+    mod = UltraTrailPartitioner()(mod)
     lib = relay.build(mod, tvm.target.Target("c"))
     print(lib)
 
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
index 885e6c6706a4..8c1b703d3b04 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
@@ -16,23 +16,13 @@
 # under the License.
 """Relay partitioner for the UltraTrail accelerator"""
 
-import tvm
-from tvm import relay
+from tvm.relay.backend.contrib.generic.partitioner import GenericPartitioner
 
-custom_target_name = "ultra_trail"
 
+class UltraTrailPartitioner(GenericPartitioner):
+    @property
+    def target_name(self):
+        return "ultra_trail"
 
-def _register_external_op_helper(op_name, supported=True):
-    @tvm.ir.register_op_attr(op_name, f"target.{custom_target_name}")
-    def _func_wrapper(expr):
-        return supported
-
-    return _func_wrapper
-
-
-def partition_for_ultra_trail(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
-    _register_external_op_helper("nn.conv1d")
-    mod = relay.transform.AnnotateTarget(custom_target_name)(mod)
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.PartitionGraph()(mod)
-    return mod
+    def register_supportet_ops(self):
+        self._register_supported_op("nn.conv1d")

From 5889fee6bc54b104cbefa07648705ba66d7d9868 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 17 Jan 2022 21:42:06 +0100
Subject: [PATCH 007/112] Add pattern-based relay graph partitioning and AOT
 codegen

---
 cmake/config.cmake                            |  4 +--
 .../backend/contrib/generic/partitioner.py    | 34 +++++--------------
 .../tvm/relay/backend/contrib/generic/run.py  | 30 ++++++++++++++--
 .../contrib/generic/ultra_trail/codegen.py    |  4 +--
 .../generic/ultra_trail/partitioner.py        |  6 ++--
 .../contrib/generic/ultra_trail/patterns.py   | 31 +++++++++++++++++
 6 files changed, 74 insertions(+), 35 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 973c0853c713..9c927ce05c0d 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -96,7 +96,7 @@ set(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT OFF)
 set(USE_OPENGL OFF)
 
 # Whether enable MicroTVM runtime
-set(USE_MICRO OFF)
+set(USE_MICRO ON)
 
 # Whether enable RPC runtime
 set(USE_RPC ON)
@@ -133,7 +133,7 @@ set(USE_MICRO_STANDALONE_RUNTIME OFF)
 # - OFF: disable llvm, note this will disable CPU codegen
 #        which is needed for most cases
 # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available.
-set(USE_LLVM OFF)
+set(USE_LLVM ON)
 
 #---------------------------------------------
 # Contrib libraries
diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/generic/partitioner.py
index 61ebbbb3da21..49e2ae4887e9 100644
--- a/python/tvm/relay/backend/contrib/generic/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/partitioner.py
@@ -18,7 +18,6 @@
 
 import tvm
 from tvm import relay
-from typing import Callable
 from abc import abstractmethod
 
 
@@ -34,29 +33,6 @@ def target_name(self) -> str:
             The hardware target name.
         """
 
-    @abstractmethod
-    def _register_supported_ops(self) -> None:
-        """Register a set of supported relay operations which are applied to the schedule.
-
-        Example
-        -------
-        Here is an example of how two supported operations can be registered.
-
-        .. code-block:: python
-
-            def _register_supported_ops(self):
-                self._register_supported_op(op_0)
-                self._register_supported_op(op_1)
-        """
-        pass
-
-    def _register_supported_op(self, op: str) -> Callable:
-        @tvm.ir.register_op_attr(op, "target.{}".format(self.target_name))
-        def _func_wrapper(_):
-            return True
-
-        return _func_wrapper
-
     def __call__(self, mod: tvm.IRModule) -> tvm.IRModule:
         """Partition the relay graph in by the NPU supported and unsupported parts.
 
@@ -71,8 +47,16 @@ def __call__(self, mod: tvm.IRModule) -> tvm.IRModule:
             The partitioned relay module.
 
         """
-        self._register_supported_ops()
+        pattern = relay.op.contrib.get_pattern_table(self.target_name)
+        mod = relay.transform.InferType()(mod)
+        mod = relay.transform.MergeComposite(pattern)(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
         mod = relay.transform.MergeCompilerRegions()(mod)
+        mod = relay.transform.InferType()(mod)
         mod = relay.transform.PartitionGraph()(mod)
+        mod = relay.transform.InferType()(mod)
+        # Defunctionalize the partitioned functions to allow lowering
+        for gv, func in mod.functions.items():
+            mod.update_func(gv, relay.transform.Defunctionalization(func, mod))
+
         return mod
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index e544b580ed3e..5f93e7ffbc7f 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -4,6 +4,9 @@
 from tvm import relay
 
 import torch
+import tarfile
+from pathlib import Path
+
 
 class TorchModel(torch.nn.Module):
     def __init__(self):
@@ -11,9 +14,12 @@ def __init__(self):
         self.conv = torch.nn.Conv1d(
             16, 24, 9, bias=False, padding=0, stride=1, dilation=1, groups=1
         )
+        self.relu = torch.nn.ReLU()
 
     def forward(self, x):
         x = self.conv(x)
+        x = self.relu(x)
+        x = x + 42
         return x
 
 
@@ -26,9 +32,29 @@ def main():
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
+    # Relay target specific partitioning
     mod = UltraTrailPartitioner()(mod)
-    lib = relay.build(mod, tvm.target.Target("c"))
-    print(lib)
+
+    # Relay build (AOT C target)
+    TARGET = tvm.target.Target("c")
+    RUNTIME = tvm.relay.backend.Runtime("crt")
+    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
+
+    with tvm.transform.PassContext(
+        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
+    ):
+        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
+
+    model_library_format_tar_path = Path("build/lib.tar")
+    model_library_format_tar_path.unlink(missing_ok=True)
+    model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
+
+    tvm.micro.export_model_library_format(module, model_library_format_tar_path)
+
+    print("Built MLF Library: ")
+    with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
+        print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
+        tar_f.extractall(model_library_format_tar_path.parent)
 
 
 if __name__ == "__main__":
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index f031b903e642..a86c31e19309 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -18,8 +18,8 @@
 
 import tvm
 from tvm import relay
-from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
-from tvm.relay.backend.contrib.generic.ultra_trail.schedules import example_sch_func
+from ..codegen import GenericCodegen
+from .schedules import example_sch_func
 
 
 class UltraTrailCodegen(GenericCodegen):
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
index 8c1b703d3b04..9e82c3697375 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
@@ -16,13 +16,11 @@
 # under the License.
 """Relay partitioner for the UltraTrail accelerator"""
 
-from tvm.relay.backend.contrib.generic.partitioner import GenericPartitioner
-
+from .patterns import pattern_table
+from ..partitioner import GenericPartitioner
 
 class UltraTrailPartitioner(GenericPartitioner):
     @property
     def target_name(self):
         return "ultra_trail"
 
-    def register_supportet_ops(self):
-        self._register_supported_op("nn.conv1d")
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
new file mode 100644
index 000000000000..b4e08c794994
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Relay graph patterns for the UltraTrail accelerator"""
+
+from tvm.relay.op.contrib.register import register_pattern_table
+from tvm.relay.dataflow_pattern import is_op, wildcard
+
+
+def example_pattern():
+    pattern = is_op("nn.conv1d")(wildcard(), wildcard())
+    pattern = is_op("nn.relu")(pattern)
+    return pattern
+
+
+@register_pattern_table("ultra_trail")
+def pattern_table():
+    return [("ultra_trail.conv1d_relu", example_pattern())]

From be8048bd0415be5c6205a261dfeaaadedf18d8e6 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 19 Jan 2022 13:52:53 +0100
Subject: [PATCH 008/112] Update API

---
 .../relay/backend/contrib/generic/codegen.py  |  7 +-
 .../backend/contrib/generic/partitioner.py    | 83 ++++++++++++++++++-
 .../tvm/relay/backend/contrib/generic/run.py  | 10 ++-
 .../contrib/generic/ultra_trail/codegen.py    | 11 +--
 .../generic/ultra_trail/partitioner.py        |  9 +-
 .../contrib/generic/ultra_trail/passes.py     | 54 ++++++++++++
 .../contrib/generic/ultra_trail/patterns.py   |  6 --
 .../contrib/generic/ultra_trail/schedules.py  | 20 ++++-
 .../relay/backend/contrib/generic/utils.py    | 72 ++++++++++++++++
 9 files changed, 250 insertions(+), 22 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/generic/utils.py

diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 891cd10592cc..8eaafcd912bf 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -22,6 +22,8 @@
 from abc import abstractmethod
 from typing import List, Tuple, Callable
 
+from .utils import extract_constants
+
 
 class GenericCodegen(object):
     def __init__(self) -> None:
@@ -53,7 +55,6 @@ def _register_tir_schedules(self):
                 pass
 
         """
-        pass
 
     @abstractmethod
     def _register_tir_passes(self) -> None:
@@ -77,7 +78,6 @@ def _register_tir_passes(self):
                 pass
 
         """
-        pass
 
     def _register_tir_schedule(
         self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
@@ -101,12 +101,15 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
+        relay_prim_func, constants = extract_constants(relay_prim_func)
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
         tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
+        tir_prim_func = tir_prim_func.with_attr("constants", constants)
+        tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
     def _lower_stir_to_nstir(self, schedule: tvm.tir.Schedule) -> tvm.tir.PrimFunc:
diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/generic/partitioner.py
index 49e2ae4887e9..1c4a3908e95f 100644
--- a/python/tvm/relay/backend/contrib/generic/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/partitioner.py
@@ -16,12 +16,26 @@
 # under the License.
 """Generic relay partitioner for NPUs"""
 
+from tvm.relay.build_module import bind_params_by_name
+from typing import Dict, List, Tuple, Optional
 import tvm
 from tvm import relay
 from abc import abstractmethod
 
+from tvm.relay.op.contrib.register import register_pattern_table
+
 
 class GenericPartitioner(object):
+    def __init__(self, variant: str = "") -> None:
+        self._variant = variant
+
+        self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
+        self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, List[str]]] = []
+
+        self._register_relay_passes()
+        self._register_patterns()
+        register_pattern_table(self.target_name, self._pattern_table)
+
     @property
     @abstractmethod
     def target_name(self) -> str:
@@ -33,7 +47,63 @@ def target_name(self) -> str:
             The hardware target name.
         """
 
-    def __call__(self, mod: tvm.IRModule) -> tvm.IRModule:
+    @abstractmethod
+    def _register_relay_passes(self) -> None:
+        """Register a set of relay passes which are applied during lowering.
+
+        Example
+        -------
+        Here is an example of how two passes can be registered.
+
+        .. code-block:: python
+
+            def _register_relay_passes(self):
+                self._register_relay_pass(pass_0)
+                self._register_relay_pass(pass_1)
+
+        Use `pass` if no relay pass should be registerd.
+
+        .. code-block:: python
+
+            def _register_relay_passes(self):
+                pass
+
+        """
+
+    @abstractmethod
+    def _register_patterns(self) -> None:
+        """Register a set of relay graph patterns which used for partitioning.
+
+        Example
+        -------
+        Here is an example of how two patterns can be registered.
+
+        .. code-block:: python
+
+            def _register_patterns(self):
+                self._register_pattern(pattern_0)
+                self._register_pattern(pattern_1)
+        """
+
+    def _register_relay_pass(self, stage: int, relay_pass: tvm.transform.Pass) -> None:
+        self._relay_passes.append((stage, relay_pass))
+
+    def _register_pattern(
+        self,
+        name: str,
+        pattern: tvm.relay.dataflow_pattern.DFPattern,
+        variants: Optional[List[str]] = None,
+    ):
+        self._patterns.append((name, pattern, [] if variants is None else variants))
+
+    def _pattern_table(self):
+        return [
+            (self.target_name + "." + pattern[0], pattern[1])
+            for pattern in self._patterns
+            if self._variant in pattern[2] or not pattern[2]
+        ]
+
+    def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]]) -> tvm.IRModule:
         """Partition the relay graph in by the NPU supported and unsupported parts.
 
         Parameters
@@ -47,16 +117,25 @@ def __call__(self, mod: tvm.IRModule) -> tvm.IRModule:
             The partitioned relay module.
 
         """
+        if params:
+            mod["main"] = bind_params_by_name(mod["main"], params)
+
         pattern = relay.op.contrib.get_pattern_table(self.target_name)
         mod = relay.transform.InferType()(mod)
+        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 0])(mod)
         mod = relay.transform.MergeComposite(pattern)(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
         mod = relay.transform.MergeCompilerRegions()(mod)
         mod = relay.transform.InferType()(mod)
         mod = relay.transform.PartitionGraph()(mod)
         mod = relay.transform.InferType()(mod)
+        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 1])(mod)
+        mod = relay.transform.InferType()(mod)
         # Defunctionalize the partitioned functions to allow lowering
         for gv, func in mod.functions.items():
-            mod.update_func(gv, relay.transform.Defunctionalization(func, mod))
+            mod.update_func(
+                gv, relay.transform.Defunctionalization(func, mod)
+            )
+        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 2])(mod)
 
         return mod
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index 5f93e7ffbc7f..5c7fd0238402 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -12,13 +12,19 @@ class TorchModel(torch.nn.Module):
     def __init__(self):
         super().__init__()
         self.conv = torch.nn.Conv1d(
-            16, 24, 9, bias=False, padding=0, stride=1, dilation=1, groups=1
+            16, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
         )
         self.relu = torch.nn.ReLU()
+        self.conv2 = torch.nn.Conv1d(
+            24, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
+        )
+        self.relu2 = torch.nn.ReLU()
 
     def forward(self, x):
         x = self.conv(x)
         x = self.relu(x)
+        x = self.conv2(x)
+        x = self.relu2(x)
         x = x + 42
         return x
 
@@ -33,7 +39,7 @@ def main():
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
     # Relay target specific partitioning
-    mod = UltraTrailPartitioner()(mod)
+    mod = UltraTrailPartitioner()(mod, params)
 
     # Relay build (AOT C target)
     TARGET = tvm.target.Target("c")
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index a86c31e19309..fb4155838e02 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -19,20 +19,21 @@
 import tvm
 from tvm import relay
 from ..codegen import GenericCodegen
-from .schedules import example_sch_func
+from .schedules import *
+from .passes import *
 
 
 class UltraTrailCodegen(GenericCodegen):
     def __init__(self):
         super(UltraTrailCodegen, self).__init__()
 
-    # 1. Tensorize
-    # 2. Generate Config
     def _register_tir_schedules(self):
-        self._register_tir_schedule(example_sch_func)
+        self._register_tir_schedule(insert_extern_calls)
 
     def _register_tir_passes(self):
-        self._register_tir_pass(0, tvm.tir.transform.Simplify())
+        self._register_tir_pass(0, CodegenGenerateConfig())
+        self._register_tir_pass(0, CodegenGenerateConstants())
+        pass
 
 
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
index 9e82c3697375..345ed9e53a4e 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
@@ -16,11 +16,18 @@
 # under the License.
 """Relay partitioner for the UltraTrail accelerator"""
 
-from .patterns import pattern_table
+from .passes import *
+from .patterns import *
 from ..partitioner import GenericPartitioner
 
+
 class UltraTrailPartitioner(GenericPartitioner):
     @property
     def target_name(self):
         return "ultra_trail"
 
+    def _register_patterns(self):
+        self._register_pattern("conv1d_relu", example_pattern())
+    
+    def _register_relay_passes(self):
+        self._register_relay_pass(2, ExtractConfig())
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
index e69de29bb2d1..a8a9d964d125 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the UltraTrail accelerator"""
+
+import tvm
+
+# TODO
+@tvm.ir.transform.module_pass(opt_level=1)
+class ExtractConfig:
+    """This pass """
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=1)
+class CodegenGenerateConfig:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return func
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=1)
+class CodegenGenerateConstants:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return func
+
+    def __call__(self, *args, **kwargs):
+        pass
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
index b4e08c794994..2eb5c75f9f3e 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
@@ -16,7 +16,6 @@
 # under the License.
 """Relay graph patterns for the UltraTrail accelerator"""
 
-from tvm.relay.op.contrib.register import register_pattern_table
 from tvm.relay.dataflow_pattern import is_op, wildcard
 
 
@@ -24,8 +23,3 @@ def example_pattern():
     pattern = is_op("nn.conv1d")(wildcard(), wildcard())
     pattern = is_op("nn.relu")(pattern)
     return pattern
-
-
-@register_pattern_table("ultra_trail")
-def pattern_table():
-    return [("ultra_trail.conv1d_relu", example_pattern())]
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
index 3861d53ed94b..5c5446606e14 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
@@ -16,8 +16,20 @@
 # under the License.
 """TIR schedule functions for the UltraTrail accelerator"""
 
-def example_sch_func(sch):
-    n, k, c, f_x, x = sch.get_loops(sch.get_block("conv1d_ncw"))
-    k_0, k_1 = sch.split(k, factors=[2, None])
-    sch.reorder(n, k_0, c, f_x, x, k_1)
+# TODO
+def insert_extern_calls(sch):
+    return sch
+
+def schedule_supported_ops(sch):
+    block_rvs = sch.get_child_blocks(sch.get_block("root"))
+    blocks = [sch.get_sref(block_rv).stmt for block_rv in block_rvs]
+
+    sch.compute_inline(sch.get_block("pad_temp"))
+    n, k, x, c, f = sch.get_loops(sch.get_block("conv1d_ncw"))
+    sch.reorder(n, k, c, f, x)
+    # sch.reverse_compute_at(sch.get_block("T_relu"), sch.get_loops(sch.get_block("conv1d_ncw"))[1])
+    # k_o, k_i = sch.split(k, factors=[None, 8])
+    # c_o, c_i = sch.split(c, factors=[None, 8])
+
+    breakpoint()
     return sch
diff --git a/python/tvm/relay/backend/contrib/generic/utils.py b/python/tvm/relay/backend/contrib/generic/utils.py
new file mode 100644
index 000000000000..3d27e3caaa12
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/utils.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Utility methods"""
+
+import tvm
+from tvm import relay
+from tvm.relay.expr_functor import ExprMutator
+
+class ExtractConstants(ExprMutator):
+    """The actual mutator pass to extract the constants from a function and replace them with
+    Vars so the function can be lowered to a TE graph. Additionally returns all the values of
+    the constants extracted."""
+
+    def __init__(self):
+        super().__init__()
+        self.constants = {} 
+        self.const_vars = []
+
+    def visit_constant(self, const):
+        if isinstance(const.checked_type, relay.ty.TensorType):
+            name = "p" + str(len(self.constants))
+            self.constants[name] = const.data
+            var = relay.var(type_annotation=const.checked_type, name_hint=name)
+            self.const_vars.append(var)
+            return var
+
+        return const
+
+    def visit_function(self, fn):
+        new_body = self.visit(fn.body)
+        new_params = list(fn.params) + self.const_vars
+        return relay.Function(new_params, new_body, attrs=fn.attrs)
+
+    def extract_constants(self, func):
+        new_func = self.visit(func)
+        return new_func, self.constants
+
+
+def extract_constants(func):
+    """Extract the constants from a function and replace them with
+    Vars so the function can be lowered to a TE graph. Additionally
+    returns all the values of the constants extracted.
+    Parameters
+    ----------
+    func : tvm.relay.Function
+        The Relay function from which to extract constants.
+    Returns
+    -------
+    new_func : tvm.relay.Function
+        The Relay function with constants replaced by vars.
+    const_dict : dict of int to numpy.ndarray
+        A dict of the extracted constants keyed by their param index.
+    """
+    new_func, consts = ExtractConstants().extract_constants(func)
+    new_func = tvm.relay.transform.InferType()(tvm.IRModule.from_expr(new_func))[
+        func.attrs["global_symbol"]
+    ]
+    return new_func, consts

From 340ddd85e14fad8a946bb5ae6cdfe8726836a127 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 2 Feb 2022 01:00:15 +0100
Subject: [PATCH 009/112] Add UltraTrail relay passes and schedule function

---
 .../relay/backend/contrib/generic/codegen.py  |   4 +-
 .../backend/contrib/generic/partitioner.py    |   4 +-
 .../tvm/relay/backend/contrib/generic/run.py  |   2 +-
 .../contrib/generic/ultra_trail/codegen.py    |   1 -
 .../generic/ultra_trail/partitioner.py        |   5 +-
 .../contrib/generic/ultra_trail/passes.py     | 143 ++++++++++++++++--
 .../contrib/generic/ultra_trail/patterns.py   |   6 +-
 .../contrib/generic/ultra_trail/schedules.py  |  35 +++--
 8 files changed, 166 insertions(+), 34 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 8eaafcd912bf..eddf8af3143c 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -101,14 +101,14 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
-        relay_prim_func, constants = extract_constants(relay_prim_func)
+        # relay_prim_func, constants = extract_constants(relay_prim_func)
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
         tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        tir_prim_func = tir_prim_func.with_attr("constants", constants)
+        # tir_prim_func = tir_prim_func.with_attr("constants", constants)
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/generic/partitioner.py
index 1c4a3908e95f..2373347e8fca 100644
--- a/python/tvm/relay/backend/contrib/generic/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/partitioner.py
@@ -117,8 +117,8 @@ def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDA
             The partitioned relay module.
 
         """
-        if params:
-            mod["main"] = bind_params_by_name(mod["main"], params)
+        # if params:
+        #     mod["main"] = bind_params_by_name(mod["main"], params)
 
         pattern = relay.op.contrib.get_pattern_table(self.target_name)
         mod = relay.transform.InferType()(mod)
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index 5c7fd0238402..b6993bf2d80d 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -12,7 +12,7 @@ class TorchModel(torch.nn.Module):
     def __init__(self):
         super().__init__()
         self.conv = torch.nn.Conv1d(
-            16, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
+            16, 24, 9, bias=True, padding=4, stride=1, dilation=1, groups=1
         )
         self.relu = torch.nn.ReLU()
         self.conv2 = torch.nn.Conv1d(
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index fb4155838e02..3e4e79dd2606 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -33,7 +33,6 @@ def _register_tir_schedules(self):
     def _register_tir_passes(self):
         self._register_tir_pass(0, CodegenGenerateConfig())
         self._register_tir_pass(0, CodegenGenerateConstants())
-        pass
 
 
 @tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
index 345ed9e53a4e..64a7714b45bd 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
@@ -27,7 +27,8 @@ def target_name(self):
         return "ultra_trail"
 
     def _register_patterns(self):
-        self._register_pattern("conv1d_relu", example_pattern())
+        self._register_pattern("conv1d_relu", conv1d_relu_pattern())
     
     def _register_relay_passes(self):
-        self._register_relay_pass(2, ExtractConfig())
+        self._register_relay_pass(1, ConfigGenerator())
+        self._register_relay_pass(2, BufferScopeAnnotator())
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
index a8a9d964d125..39589c1b8627 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
@@ -17,19 +17,146 @@
 """Transform passes for the UltraTrail accelerator"""
 
 import tvm
+from tvm import relay
+
+
+class LayerConfigGenerator(relay.ExprVisitor):
+    def __init__(self, acc_spec, config):
+        super(LayerConfigGenerator, self).__init__()
+        self.acc_spec = acc_spec
+        self.config = config
+
+    def _extract_config_from_call(self, call):
+        # Config extraction assumes that the pattern matching already performed all validity checks
+        if call.op.name == "nn.conv1d":
+            self.config["ch_in_len"] = int(call.type_args[0].shape[2])
+            self.config["ch_in_blk"] = int(call.type_args[0].shape[1]) // self.acc_spec["array_dim"]
+            self.config["kernel_shape"] = int(call.attrs.kernel_size[0])
+            self.config["stride"] = int(call.attrs.strides[0])
+            self.config["pad"] = int(call.attrs.padding[0] > 0)
+        elif call.op.name == "nn.bias_add":
+            self.config["bias"] = 1
+        elif call.op.name == "nn.relu":
+            self.config["relu"] = 1
+
+    def visit_call(self, call):
+        self._extract_config_from_call(call)
+        for a in call.args:
+            self.visit(a)
+
+
+class SubgraphConfigGenerator(relay.ExprVisitor):
+    def __init__(self, acc_spec):
+        super(SubgraphConfigGenerator, self).__init__()
+        self.acc_spec = acc_spec
+        self.config = []
+
+    def _compute_dataflow_control(self):
+        # The graph-based execution order and corresponding configuration should be computed here.
+        # Currently only switches between two feature memories (no parallel pathes).
+        self.config.reverse()
+
+        mem_ctrl = 0x0
+        for c in self.config:
+            c["mem_ctrl"] = mem_ctrl
+            mem_ctrl = 0x2 if mem_ctrl == 0x0 else 0x0
+
+        self.config[-1]["last"] = 1
+
+    def visit_function(self, func):
+        layer_config = {k: 0 for k in self.acc_spec["conf_reg_layer_bits"].keys()}
+        layer_config["ch_out_len"] = int(func.ret_type.shape[2])
+        layer_config["ch_out_blk"] = int(func.ret_type.shape[1]) // self.acc_spec["array_dim"]
+        LayerConfigGenerator(self.acc_spec, layer_config).visit(func.body)
+        self.config.append(layer_config)
+
+    def generate_config(self, func):
+        self.visit(func.body)
+        self._compute_dataflow_control()
+        return self.config
+
 
-# TODO
 @tvm.ir.transform.module_pass(opt_level=1)
-class ExtractConfig:
-    """This pass """
+class ConfigGenerator:
+    """This pass generates a configuration string for the UltraTrail accelerator 
+    for each partitioned relay subgraph."""
+
+    def __init__(self):
+        # Note: This information should eventually be passed as an accelerator description.
+        self.acc_spec = {
+            "array_dim": 8,
+            "conf_reg_layers": 16,
+            "conf_reg_layer_bits": {
+                "mem_ctrl": 4,
+                "ch_in_len": 7,
+                "ch_in_blk": 4,
+                "ch_out_len": 7,
+                "ch_out_blk": 4,
+                "kernel_shape": 4,
+                "stride": 3,
+                "avg_pool_exp": 3,
+                "pad": 1,
+                "relu": 1,
+                "bias": 1,
+                "avg": 1,
+                "early_exit": 1,
+                "last": 1,
+            },
+        }
+
+    def _config_to_bitstring(self, config):
+        bitstring = config
+        return bitstring
 
     def transform_module(
         self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.ir.IRModule:
+
+        # Create UltraTrail specific config based on matched subgraph
+        for gv, func in mod.functions.items():
+            if func.attrs is not None and func.attrs["Compiler"] == "ultra_trail":
+                config = SubgraphConfigGenerator(self.acc_spec).generate_config(func)
+                config_bitstring = self._config_to_bitstring(config)
+                mod.update_func(gv, func.with_attr("ut_config", config_bitstring))
+
         return mod
 
-    def __call__(self, *args, **kwargs):
-        pass
+
+@tvm.ir.transform.module_pass(opt_level=1)
+class BufferScopeAnnotator:
+    """This pass annotates the params of an offloaded function with the target UltraTrail memory."""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        class FunctionVisitor(relay.ExprVisitor):
+            def __init__(self):
+                super(FunctionVisitor, self).__init__()
+                self._var_scope_map = {}
+
+            def visit_call(self, call):
+                if call.op.name == "nn.conv1d":
+                    self._var_scope_map[call.args[1]] = "wmem"
+                elif call.op.name == "nn.bias_add":
+                    self._var_scope_map[call.args[1]] = "bmem"
+
+                for a in call.args:
+                    self.visit(a)
+
+            def var_scope_map(self, func):
+                self.visit(func.body)
+                return self._var_scope_map
+
+        # Annotate buffer scopes
+        for gv, func in mod.functions.items():
+            if func.attrs is not None and func.attrs["Compiler"] == "ultra_trail":
+                var_scope_map = FunctionVisitor().var_scope_map(func)
+                scopes = [
+                    var_scope_map[var] if var in var_scope_map else "fmem" for var in func.params
+                ]
+                mod.update_func(gv, func.with_attr("ut_buffer_scopes", scopes))
+
+        return mod
 
 
 @tvm.tir.transform.prim_func_pass(opt_level=1)
@@ -39,9 +166,6 @@ def transform_function(
     ) -> tvm.tir.PrimFunc:
         return func
 
-    def __call__(self, *args, **kwargs):
-        pass
-
 
 @tvm.tir.transform.prim_func_pass(opt_level=1)
 class CodegenGenerateConstants:
@@ -49,6 +173,3 @@ def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
         return func
-
-    def __call__(self, *args, **kwargs):
-        pass
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
index 2eb5c75f9f3e..cc9852e9b611 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
@@ -19,7 +19,9 @@
 from tvm.relay.dataflow_pattern import is_op, wildcard
 
 
-def example_pattern():
+def conv1d_relu_pattern():
     pattern = is_op("nn.conv1d")(wildcard(), wildcard())
-    pattern = is_op("nn.relu")(pattern)
+    optional_bias = lambda x: is_op("nn.bias_add")(x, wildcard())
+    optional_relu = lambda x: is_op("nn.relu")(x)
+    pattern = pattern.optional(optional_bias).optional(optional_relu)
     return pattern
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
index 5c5446606e14..f87db02c0684 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
@@ -16,20 +16,29 @@
 # under the License.
 """TIR schedule functions for the UltraTrail accelerator"""
 
-# TODO
-def insert_extern_calls(sch):
-    return sch
+from tvm.topi.utils import prod
+from tvm import tir
+from tvm.script import tir as T
 
-def schedule_supported_ops(sch):
-    block_rvs = sch.get_child_blocks(sch.get_block("root"))
-    blocks = [sch.get_sref(block_rv).stmt for block_rv in block_rvs]
+# create one load buffer extern_call for each buffer_var (input/weights)
+# - dont reset counter, only for first
+# - packed buffers, correct layout, take care of missalignment at the end (software?,hardware?)
+# create one load buffer for config
+def insert_extern_calls(sch):
+    def extern_calls():
+        calls = []
+        buffer_scopes = list(sch.mod["main"].attrs["relay_attrs"]["ut_buffer_scopes"])
+        buffer_scopes.reverse() # for some reason TIR params are reversed to relay function
+        for i, buffer_scope in enumerate(buffer_scopes):
+            buffer = sch.mod["main"].buffer_map[sch.mod["main"].params[i]]
+            size = prod(buffer.shape)
+            var = buffer.data
+            call = tir.call_extern("int32", f"load_{buffer_scope}", var, size)
+            calls.append(tir.Evaluate(call))
+        seq = tir.stmt_seq(*calls)
+        return tir.Block([], [], [], "call_extern", seq)
 
-    sch.compute_inline(sch.get_block("pad_temp"))
-    n, k, x, c, f = sch.get_loops(sch.get_block("conv1d_ncw"))
-    sch.reorder(n, k, c, f, x)
-    # sch.reverse_compute_at(sch.get_block("T_relu"), sch.get_loops(sch.get_block("conv1d_ncw"))[1])
-    # k_o, k_i = sch.split(k, factors=[None, 8])
-    # c_o, c_i = sch.split(c, factors=[None, 8])
+    root_sref = sch.get_sref(sch.get_block("root"))
+    sch.state.replace(root_sref, extern_calls())
 
-    breakpoint()
     return sch

From 86e851a351207c2db176720abc5a96e8dfc19184 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 2 Feb 2022 18:42:46 +0100
Subject: [PATCH 010/112] Update UltraTrail relay passes

---
 .../contrib/generic/ultra_trail/passes.py     | 41 +++++++++++--------
 .../contrib/generic/ultra_trail/schedules.py  |  1 -
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
index 39589c1b8627..95a4e26ebe76 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
@@ -19,6 +19,8 @@
 import tvm
 from tvm import relay
 
+from collections import OrderedDict
+
 
 class LayerConfigGenerator(relay.ExprVisitor):
     def __init__(self, acc_spec, config):
@@ -86,26 +88,31 @@ def __init__(self):
         self.acc_spec = {
             "array_dim": 8,
             "conf_reg_layers": 16,
-            "conf_reg_layer_bits": {
-                "mem_ctrl": 4,
-                "ch_in_len": 7,
-                "ch_in_blk": 4,
-                "ch_out_len": 7,
-                "ch_out_blk": 4,
-                "kernel_shape": 4,
-                "stride": 3,
-                "avg_pool_exp": 3,
-                "pad": 1,
-                "relu": 1,
-                "bias": 1,
-                "avg": 1,
-                "early_exit": 1,
-                "last": 1,
-            },
+            "conf_reg_layer_bits": OrderedDict(
+                {
+                    "mem_ctrl": 4,
+                    "ch_in_len": 7,
+                    "ch_in_blk": 4,
+                    "ch_out_len": 7,
+                    "ch_out_blk": 4,
+                    "kernel_shape": 4,
+                    "stride": 3,
+                    "avg_pool_exp": 3,
+                    "pad": 1,
+                    "relu": 1,
+                    "bias": 1,
+                    "avg": 1,
+                    "early_exit": 1,
+                    "last": 1,
+                }
+            ),
         }
 
     def _config_to_bitstring(self, config):
-        bitstring = config
+        bitstring = ""
+        for layer in config:
+            for k, v in self.acc_spec["conf_reg_layer_bits"].items():
+                bitstring += "{:0{}b}".format(layer[k], v)
         return bitstring
 
     def transform_module(
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
index f87db02c0684..4b2fd4ebd0a2 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
@@ -18,7 +18,6 @@
 
 from tvm.topi.utils import prod
 from tvm import tir
-from tvm.script import tir as T
 
 # create one load buffer extern_call for each buffer_var (input/weights)
 # - dont reset counter, only for first

From 3303ae33c602a558ef4ca8dddf4755027109cf4c Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 2 Feb 2022 18:44:01 +0100
Subject: [PATCH 011/112] Add tir_to_runtime hook for UltraTrail

---
 cmake/modules/contrib/Generic.cmake           |  3 +-
 src/relay/backend/contrib/generic/codegen.cc  |  2 +-
 src/relay/backend/contrib/generic/targets.cc  |  6 +-
 .../generic/ultra_trail/tir_to_runtime.cc     | 74 +++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc

diff --git a/cmake/modules/contrib/Generic.cmake b/cmake/modules/contrib/Generic.cmake
index e03087b1cbc2..497556097ee4 100644
--- a/cmake/modules/contrib/Generic.cmake
+++ b/cmake/modules/contrib/Generic.cmake
@@ -17,6 +17,7 @@
 
 if(USE_GENERIC)
   file(GLOB COMPILER_GENERIC_SRCS
-       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/*)
+       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/*
+       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/ultra_trail/*)
   list(APPEND COMPILER_SRCS ${COMPILER_GENERIC_SRCS})
 endif(USE_GENERIC)
diff --git a/src/relay/backend/contrib/generic/codegen.cc b/src/relay/backend/contrib/generic/codegen.cc
index f192d5a1cc5d..22995f9a3957 100644
--- a/src/relay/backend/contrib/generic/codegen.cc
+++ b/src/relay/backend/contrib/generic/codegen.cc
@@ -78,7 +78,7 @@ class RelayToTIRMutator : public MixedModeMutator {
             tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func_" + target_name_);
         ICHECK(relay_to_tir_func_pf);
         tir::PrimFunc prim_func = (*relay_to_tir_func_pf)(func);
-        prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target("c"));
+        prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target(target_name_));
         String symbol_name = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol).value();
         GlobalVar gv(symbol_name);
         gv->checked_type_ = func->checked_type();
diff --git a/src/relay/backend/contrib/generic/targets.cc b/src/relay/backend/contrib/generic/targets.cc
index dcd80a46162b..23fa215d60b7 100644
--- a/src/relay/backend/contrib/generic/targets.cc
+++ b/src/relay/backend/contrib/generic/targets.cc
@@ -32,12 +32,16 @@ namespace relay {
 namespace contrib {
 namespace generic {
     tvm::transform::Pass RelayToTIR(String target_name);
+namespace ultra_trail {
+    runtime::Module TIRToRuntime(IRModule mod, Target target);
+}  // namespace ultra_trail
 }  // namespace generic
 }  // namespace contrib
 }  // namespace relay
 
 TVM_REGISTER_TARGET_KIND("ultra_trail", kDLCPU)
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("ultra_trail"));
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("ultra_trail"))
+    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::generic::ultra_trail::TIRToRuntime);
 
 TVM_REGISTER_TARGET_KIND("rb_npu", kDLCPU)
     .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("rb_npu"));
diff --git a/src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc b/src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc
new file mode 100644
index 000000000000..df0520b7ca63
--- /dev/null
+++ b/src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cmath>
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "../../../../../runtime/file_utils.h"
+#include "../../../../../target/source/codegen_c.h"
+#include "../../../../../target/source/codegen_c_host.h"
+
+namespace tvm {
+using namespace tir;
+namespace relay {
+namespace contrib {
+namespace generic {
+namespace ultra_trail {
+
+class CodeGenUltraTrail : public codegen::CodeGenCHost {
+ public:
+  void Init(bool output_ssa, bool emit_asserts, std::string target_str) {
+    decl_stream << "#include <cmem.h>\n";
+    decl_stream << "#include <archi_hwpe.h>\n";
+    decl_stream << "#include <hal_hwpe.h>\n";
+    CodeGenCHost::Init(output_ssa, emit_asserts, target_str);
+  }
+
+  /*!
+   * \brief Emit code that offloads a subgraph to the Cortex-M
+   *
+   * \return string of code that offloads a subgraph to the Cortex-M
+   */
+  void AddFunction(const PrimFunc& prim_func) { CodeGenC::AddFunction(prim_func); }
+};
+
+runtime::Module TIRToRuntime(IRModule mod, Target target) {
+  bool output_ssa = false;
+  bool emit_asserts = false;
+  CodeGenUltraTrail codegen;
+  Array<String> function_names;
+  codegen.Init(output_ssa, emit_asserts, target->str());
+  for (auto kv : mod->functions) {
+    auto prim_func = Downcast<PrimFunc>(kv.second);
+    auto global_symbol = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+    function_names.push_back(global_symbol.value());
+    codegen.AddFunction(prim_func);
+  }
+  std::string code = codegen.Finish();
+  return codegen::CSourceModuleCreate(code, "c", function_names);
+}
+
+}  // namespace ultra_trail
+}  // namespace generic
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm

From d220c5fd04b31412f4d936f4cf0243d35ef352a5 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 4 Feb 2022 10:57:51 +0100
Subject: [PATCH 012/112] Add operator strategy registration to lowering

---
 .../relay/backend/contrib/generic/codegen.py  |  35 +++++
 .../contrib/generic/ultra_trail/codegen.py    |   4 +
 .../contrib/generic/ultra_trail/strategies.py | 120 ++++++++++++++++++
 3 files changed, 159 insertions(+)
 create mode 100644 python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py

diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index eddf8af3143c..7d2e6404dd29 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -18,6 +18,7 @@
 
 import tvm
 from tvm import relay, te, tir
+from tvm.relay.op import op as _op
 
 from abc import abstractmethod
 from typing import List, Tuple, Callable
@@ -30,9 +31,33 @@ def __init__(self) -> None:
         self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
 
+        self._register_operator_strategies()
         self._register_tir_schedules()
         self._register_tir_passes()
 
+    @abstractmethod
+    def _register_operator_strategies(self) -> None:
+        """Register a set of operator strategies which are considered during lowering from relay to TE.
+
+        Example
+        -------
+        Here is an example of how two operator strategies can be registered.
+
+        .. code-block:: python
+
+            def _register_operator_strategies(self):
+                self._register_operator_strategy(operator_strategy_0)
+                self._register_operator_strategy(operator_strategy_1)
+
+        Use `pass` if no operator strategy should be registerd.
+
+        .. code-block:: python
+
+            def _register_operator_strategies(self):
+                pass
+
+        """
+
     @abstractmethod
     def _register_tir_schedules(self) -> None:
         """Register a set of TIR scheduling functions which are applied to the schedule.
@@ -79,6 +104,16 @@ def _register_tir_passes(self):
 
         """
 
+    def _register_operator_strategy(
+        self,
+        op: str,
+        strat: Callable[
+            [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target], _op.OpStrategy
+        ],
+        plevel: int = 11,
+    ) -> None:
+        _op.register_strategy(op, strat, level=plevel)
+
     def _register_tir_schedule(
         self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
     ) -> None:
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
index 3e4e79dd2606..6e5404cf7bd9 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
@@ -19,6 +19,7 @@
 import tvm
 from tvm import relay
 from ..codegen import GenericCodegen
+from .strategies import *
 from .schedules import *
 from .passes import *
 
@@ -27,6 +28,9 @@ class UltraTrailCodegen(GenericCodegen):
     def __init__(self):
         super(UltraTrailCodegen, self).__init__()
 
+    def _register_operator_strategies(self):
+        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
+
     def _register_tir_schedules(self):
         self._register_tir_schedule(insert_extern_calls)
 
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py b/python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py
new file mode 100644
index 000000000000..cd01a8c87132
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py
@@ -0,0 +1,120 @@
+from tvm import relay, te
+from tvm.relay.op import op as _op
+from tvm.topi.utils import get_const_tuple
+from tvm.topi.nn.utils import get_pad_tuple1d
+from tvm.relay.op.strategy.generic import wrap_compute_conv1d, wrap_topi_schedule
+
+import logging
+
+logger = logging.getLogger("strategy")
+
+def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None):
+    """1D convolution forward operator for NCW layout.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        3-D with shape [batch, in_channel, in_width]
+
+    kernel : tvm.te.Tensor
+        3-D with shape [num_filter, in_channel, filter_size]
+
+    strides : int or tuple
+        The spatial stride along width
+
+    padding : int, tuple, or str
+        Padding size can be an integer for equal padding,
+        a tuple of (left, right) or a string in ['VALID', 'SAME'].
+
+    dilation : int or tuple
+        Dilation rate if convolution should be dilated.
+
+    out_dtype : str
+        The output data type. If None then output is same type as input.
+    """
+    s = strides
+    d = dilation
+    if out_dtype is None:
+        out_dtype = data.dtype
+    if isinstance(strides, (tuple, list)):
+        s = strides[0]
+    if isinstance(dilation, (tuple, list)):
+        d = dilation[0]
+
+    batch, in_channels, data_width = data.shape
+    out_channels, _, kernel_size = kernel.shape
+
+    # Compute padding and out width
+    pad_left, pad_right = get_pad_tuple1d(padding, (kernel_size,))
+    if pad_left != pad_right:
+        raise ValueError("Padding has to be symmetric. Got %d %d" % pad_left, pad_right)
+    p = pad_left
+    out_width = (data_width + 2 * p - kernel_size - (kernel_size - 1) * (d - 1)) // s + 1
+
+    # Compute graph
+    rc = te.reduce_axis((0, in_channels), name="rc")
+    rx = te.reduce_axis((0, kernel_size), name="rx")
+    return te.compute(
+        (batch, out_channels, out_width),
+        lambda nn, kk, xx: te.sum(
+            te.if_then_else(
+                te.any(s * xx + d * rx - p < 0, s * xx + d * rx - p >= data_width),
+                0.0,
+                data[nn, rc, s * xx + d * rx - p].astype(out_dtype)
+                * kernel[kk, rc, rx].astype(out_dtype),
+            ),
+            axis=[rc, rx],
+        ),
+        tag="custom_conv1d_ncw",
+    )
+
+
+# TVM integration: Add schedule to `python/tvm/topi/generic/nn.py`
+def schedule_conv1d_ncw(outs):
+    """Schedule for conv1d_ncw
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of conv1d_ncw
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    s = te.create_schedule([x.op for x in outs])
+    nn, kk, xx = s[outs[0]].op.axis
+    rc, rx = s[outs[0]].op.reduce_axis
+
+    # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
+    # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
+
+    # s[outs[0]].reorder(kk_outer, xx_outer, kk_inner, xx_inner)
+    # s[outs[0]].vectorize(xx)
+    # s[outs[0]].unroll(rc)
+
+    return s
+
+
+# TVM integration: Add strategy to `python/tvm/relay/op/strategy/generic.py`
+@relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
+def custom_conv1d_strategy(attrs, inputs, out_type, target):
+    """custom conv1d generic strategy"""
+    logger.warning("custom conv1d is not optimized for this platform.")
+    layout = attrs.data_layout
+    dilation = get_const_tuple(attrs.dilation)
+    if dilation[0] < 1:
+        raise ValueError("dilation should be a positive value")
+    strategy = _op.OpStrategy()
+    if layout == "NCW":
+        strategy.add_implementation(
+            wrap_compute_conv1d(conv1d_ncw),
+            wrap_topi_schedule(schedule_conv1d_ncw),
+            name="custom_conv1d_ncw.generic",
+        )
+    else:
+        raise ValueError("Unsupported conv1d layout {}".format(layout))
+    return strategy

From 1003b3f867820d9d7d48cf75a4a681908d92e717 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 4 Feb 2022 11:15:16 +0100
Subject: [PATCH 013/112] Add option to pass constants as attributes

---
 python/tvm/relay/backend/contrib/generic/codegen.py     | 8 ++++----
 python/tvm/relay/backend/contrib/generic/partitioner.py | 6 +++---
 python/tvm/relay/backend/contrib/generic/run.py         | 3 ++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/generic/codegen.py
index 7d2e6404dd29..6bd9c69481b5 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/generic/codegen.py
@@ -21,7 +21,7 @@
 from tvm.relay.op import op as _op
 
 from abc import abstractmethod
-from typing import List, Tuple, Callable
+from typing import List, Tuple, Callable, Optional
 
 from .utils import extract_constants
 
@@ -110,7 +110,7 @@ def _register_operator_strategy(
         strat: Callable[
             [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target], _op.OpStrategy
         ],
-        plevel: int = 11,
+        plevel: Optional[int] = 11,
     ) -> None:
         _op.register_strategy(op, strat, level=plevel)
 
@@ -136,14 +136,14 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
-        # relay_prim_func, constants = extract_constants(relay_prim_func)
+        relay_prim_func, constants = extract_constants(relay_prim_func)
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
         tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        # tir_prim_func = tir_prim_func.with_attr("constants", constants)
+        tir_prim_func = tir_prim_func.with_attr("constants", constants)
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/generic/partitioner.py
index 2373347e8fca..70a6947f5d37 100644
--- a/python/tvm/relay/backend/contrib/generic/partitioner.py
+++ b/python/tvm/relay/backend/contrib/generic/partitioner.py
@@ -103,7 +103,7 @@ def _pattern_table(self):
             if self._variant in pattern[2] or not pattern[2]
         ]
 
-    def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]]) -> tvm.IRModule:
+    def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None) -> tvm.IRModule:
         """Partition the relay graph in by the NPU supported and unsupported parts.
 
         Parameters
@@ -117,8 +117,8 @@ def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDA
             The partitioned relay module.
 
         """
-        # if params:
-        #     mod["main"] = bind_params_by_name(mod["main"], params)
+        if params:
+            mod["main"] = bind_params_by_name(mod["main"], params)
 
         pattern = relay.op.contrib.get_pattern_table(self.target_name)
         mod = relay.transform.InferType()(mod)
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/generic/run.py
index b6993bf2d80d..ac21ea19a048 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/generic/run.py
@@ -39,7 +39,8 @@ def main():
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
     # Relay target specific partitioning
-    mod = UltraTrailPartitioner()(mod, params)
+    # mod = UltraTrailPartitioner()(mod, params)
+    mod = UltraTrailPartitioner()(mod)
 
     # Relay build (AOT C target)
     TARGET = tvm.target.Target("c")

From b9de8c1b704939748819e499c153c52f5df80151 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 4 Feb 2022 14:31:31 +0100
Subject: [PATCH 014/112] Refactor naming: Generic to UMA

---
 CMakeLists.txt                                     |  1 +
 cmake/config.cmake                                 |  3 +++
 cmake/modules/contrib/{Generic.cmake => UMA.cmake} | 12 ++++++------
 .../backend/contrib/{generic => uma}/__init__.py   |  4 ++--
 .../contrib/{generic/codegen.py => uma/lower.py}   |  4 ++--
 .../contrib/{generic => uma}/partitioner.py        |  4 ++--
 .../rb_npu/codegen.py => uma/rb_npu/lower.py}      | 10 +++++-----
 .../relay/backend/contrib/{generic => uma}/run.py  |  4 ++--
 .../codegen.py => uma/ultra_trail/lower.py}        | 12 ++++++------
 .../{generic => uma}/ultra_trail/partitioner.py    |  6 +++---
 .../contrib/{generic => uma}/ultra_trail/passes.py |  0
 .../{generic => uma}/ultra_trail/patterns.py       |  0
 .../{generic => uma}/ultra_trail/schedules.py      |  0
 .../{generic => uma}/ultra_trail/strategies.py     |  0
 .../backend/contrib/{generic => uma}/utils.py      |  2 +-
 .../backend/contrib/{generic => uma}/codegen.cc    | 14 ++++++--------
 .../backend/contrib/{generic => uma}/targets.cc    | 14 +++++++-------
 .../{generic => uma}/ultra_trail/tir_to_runtime.cc |  4 ++--
 18 files changed, 48 insertions(+), 46 deletions(-)
 rename cmake/modules/contrib/{Generic.cmake => UMA.cmake} (75%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/__init__.py (93%)
 rename python/tvm/relay/backend/contrib/{generic/codegen.py => uma/lower.py} (98%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/partitioner.py (97%)
 rename python/tvm/relay/backend/contrib/{generic/rb_npu/codegen.py => uma/rb_npu/lower.py} (85%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/run.py (93%)
 rename python/tvm/relay/backend/contrib/{generic/ultra_trail/codegen.py => uma/ultra_trail/lower.py} (86%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/ultra_trail/partitioner.py (88%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/ultra_trail/passes.py (100%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/ultra_trail/patterns.py (100%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/ultra_trail/schedules.py (100%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/ultra_trail/strategies.py (100%)
 rename python/tvm/relay/backend/contrib/{generic => uma}/utils.py (97%)
 rename src/relay/backend/contrib/{generic => uma}/codegen.cc (89%)
 rename src/relay/backend/contrib/{generic => uma}/targets.cc (77%)
 rename src/relay/backend/contrib/{generic => uma}/ultra_trail/tir_to_runtime.cc (98%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8dc03ee0f40e..b15fa9847e52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -497,6 +497,7 @@ include(cmake/modules/contrib/TensorRT.cmake)
 include(cmake/modules/contrib/VitisAI.cmake)
 include(cmake/modules/contrib/Verilator.cmake)
 include(cmake/modules/contrib/CLML.cmake)
+include(cmake/modules/contrib/UMA.cmake)
 include(cmake/modules/Git.cmake)
 include(cmake/modules/LibInfo.cmake)
 include(cmake/modules/RustExt.cmake)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 9c927ce05c0d..0a70f1daa86e 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -380,3 +380,6 @@ set(SUMMARIZE OFF)
 # To enable pass the path to the root libtorch (or PyTorch) directory
 # OFF or /path/to/torch/
 set(USE_LIBTORCH OFF)
+
+# Whether to use the Universal Modular Accelerator Interface
+set(USE_UMA ON)
diff --git a/cmake/modules/contrib/Generic.cmake b/cmake/modules/contrib/UMA.cmake
similarity index 75%
rename from cmake/modules/contrib/Generic.cmake
rename to cmake/modules/contrib/UMA.cmake
index 497556097ee4..8adf0c7efe3f 100644
--- a/cmake/modules/contrib/Generic.cmake
+++ b/cmake/modules/contrib/UMA.cmake
@@ -15,9 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-if(USE_GENERIC)
-  file(GLOB COMPILER_GENERIC_SRCS
-       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/*
-       CONFIGURE_DEPENDS src/relay/backend/contrib/generic/ultra_trail/*)
-  list(APPEND COMPILER_SRCS ${COMPILER_GENERIC_SRCS})
-endif(USE_GENERIC)
+if(USE_UMA)
+  file(GLOB COMPILER_UMA_SRCS
+       CONFIGURE_DEPENDS src/relay/backend/contrib/uma/*
+       CONFIGURE_DEPENDS src/relay/backend/contrib/uma/ultra_trail/*)
+  list(APPEND COMPILER_SRCS ${COMPILER_UMA_SRCS})
+endif(USE_UMA)
diff --git a/python/tvm/relay/backend/contrib/generic/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
similarity index 93%
rename from python/tvm/relay/backend/contrib/generic/__init__.py
rename to python/tvm/relay/backend/contrib/uma/__init__.py
index 0f51b92b11cd..153a63142785 100644
--- a/python/tvm/relay/backend/contrib/generic/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/__init__.py
@@ -15,5 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 """Generic NPU codegen modules."""
-from .ultra_trail import codegen
-from .rb_npu import codegen
+from .ultra_trail import lower
+from .rb_npu import lower
diff --git a/python/tvm/relay/backend/contrib/generic/codegen.py b/python/tvm/relay/backend/contrib/uma/lower.py
similarity index 98%
rename from python/tvm/relay/backend/contrib/generic/codegen.py
rename to python/tvm/relay/backend/contrib/uma/lower.py
index 6bd9c69481b5..21c3efd5dd65 100644
--- a/python/tvm/relay/backend/contrib/generic/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/lower.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Generic codegen for NPUs"""
+"""Lowering base class of the Universal Modular Accelerator Interface (UMA)"""
 
 import tvm
 from tvm import relay, te, tir
@@ -26,7 +26,7 @@
 from .utils import extract_constants
 
 
-class GenericCodegen(object):
+class UMALower(object):
     def __init__(self) -> None:
         self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
diff --git a/python/tvm/relay/backend/contrib/generic/partitioner.py b/python/tvm/relay/backend/contrib/uma/partitioner.py
similarity index 97%
rename from python/tvm/relay/backend/contrib/generic/partitioner.py
rename to python/tvm/relay/backend/contrib/uma/partitioner.py
index 70a6947f5d37..ce8c918e5944 100644
--- a/python/tvm/relay/backend/contrib/generic/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/partitioner.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Generic relay partitioner for NPUs"""
+"""Partitioner base class of the Universal Modular Accelerator Interface (UMA)"""
 
 from tvm.relay.build_module import bind_params_by_name
 from typing import Dict, List, Tuple, Optional
@@ -25,7 +25,7 @@
 from tvm.relay.op.contrib.register import register_pattern_table
 
 
-class GenericPartitioner(object):
+class UMAPartitioner(object):
     def __init__(self, variant: str = "") -> None:
         self._variant = variant
 
diff --git a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py b/python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
similarity index 85%
rename from python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
rename to python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
index 9326c792ffad..e6008f79df1d 100644
--- a/python/tvm/relay/backend/contrib/generic/rb_npu/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
@@ -18,12 +18,12 @@
 
 import tvm
 from tvm import relay
-from tvm.relay.backend.contrib.generic.codegen import GenericCodegen
+from tvm.relay.backend.contrib.uma.lower import UMALower
 
 
-class RBNPUCodegen(GenericCodegen):
+class RBNPULower(UMALower):
     def __init__(self):
-        super(RBNPUCodegen, self).__init__()
+        super(RBNPULower, self).__init__()
 
     def _register_tir_schedules(self):
         pass
@@ -32,7 +32,7 @@ def _register_tir_passes(self):
         pass
 
 
-@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_rb_npu")
+@tvm._ffi.register_func("relay.ext.uma.relay_to_tir_func_rb_npu")
 def relay_to_tir_func_rb_npu(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     """
     This is the hook for python-based lowering of relay function
@@ -48,6 +48,6 @@ def relay_to_tir_func_rb_npu(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     prim_func : tir.PrimFunc
         This returns the scheduled PrimFunc
     """
-    codegen = RBNPUCodegen()
+    codegen = RBNPULower()
     prim_func = codegen.relay_to_tir_func(ext_func)
     return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/run.py b/python/tvm/relay/backend/contrib/uma/run.py
similarity index 93%
rename from python/tvm/relay/backend/contrib/generic/run.py
rename to python/tvm/relay/backend/contrib/uma/run.py
index ac21ea19a048..933cf1eaeb4e 100644
--- a/python/tvm/relay/backend/contrib/generic/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -1,6 +1,6 @@
 import tvm
-import tvm.relay.backend.contrib.generic
-from tvm.relay.backend.contrib.generic.ultra_trail.partitioner import UltraTrailPartitioner
+import tvm.relay.backend.contrib.uma
+from tvm.relay.backend.contrib.uma.ultra_trail.partitioner import UltraTrailPartitioner
 from tvm import relay
 
 import torch
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py
similarity index 86%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py
index 6e5404cf7bd9..e6cd05c455c4 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py
@@ -14,19 +14,19 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Codegen for the UltraTrail accelerator"""
+"""UMA lowering for the UltraTrail accelerator"""
 
 import tvm
 from tvm import relay
-from ..codegen import GenericCodegen
+from ..lower import UMALower
 from .strategies import *
 from .schedules import *
 from .passes import *
 
 
-class UltraTrailCodegen(GenericCodegen):
+class UltraTrailLower(UMALower):
     def __init__(self):
-        super(UltraTrailCodegen, self).__init__()
+        super(UltraTrailLower, self).__init__()
 
     def _register_operator_strategies(self):
         self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
@@ -39,7 +39,7 @@ def _register_tir_passes(self):
         self._register_tir_pass(0, CodegenGenerateConstants())
 
 
-@tvm._ffi.register_func("relay.ext.generic.relay_to_tir_func_ultra_trail")
+@tvm._ffi.register_func("relay.ext.uma.relay_to_tir_func_ultra_trail")
 def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     """
     This is the hook for python-based lowering of relay function
@@ -55,6 +55,6 @@ def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
     prim_func : tir.PrimFunc
         This returns the scheduled PrimFunc
     """
-    codegen = UltraTrailCodegen()
+    codegen = UltraTrailLower()
     prim_func = codegen.relay_to_tir_func(ext_func)
     return prim_func
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py
similarity index 88%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py
index 64a7714b45bd..f5ec36a49425 100644
--- a/python/tvm/relay/backend/contrib/generic/ultra_trail/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py
@@ -14,14 +14,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Relay partitioner for the UltraTrail accelerator"""
+"""UMA partitioner for the UltraTrail accelerator"""
 
 from .passes import *
 from .patterns import *
-from ..partitioner import GenericPartitioner
+from ..partitioner import UMAPartitioner
 
 
-class UltraTrailPartitioner(GenericPartitioner):
+class UltraTrailPartitioner(UMAPartitioner):
     @property
     def target_name(self):
         return "ultra_trail"
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/passes.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/patterns.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/schedules.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py
diff --git a/python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/generic/ultra_trail/strategies.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
diff --git a/python/tvm/relay/backend/contrib/generic/utils.py b/python/tvm/relay/backend/contrib/uma/utils.py
similarity index 97%
rename from python/tvm/relay/backend/contrib/generic/utils.py
rename to python/tvm/relay/backend/contrib/uma/utils.py
index 3d27e3caaa12..3519d830ddd1 100644
--- a/python/tvm/relay/backend/contrib/generic/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/utils.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Utility methods"""
+"""Utility methods for the Universal Modular Accelerator Interface (UMA)"""
 
 import tvm
 from tvm import relay
diff --git a/src/relay/backend/contrib/generic/codegen.cc b/src/relay/backend/contrib/uma/codegen.cc
similarity index 89%
rename from src/relay/backend/contrib/generic/codegen.cc
rename to src/relay/backend/contrib/uma/codegen.cc
index 22995f9a3957..f9168ea08fa6 100644
--- a/src/relay/backend/contrib/generic/codegen.cc
+++ b/src/relay/backend/contrib/uma/codegen.cc
@@ -18,9 +18,9 @@
  */
 
 /*!
- * \file relay/backend/contrib/generic/codegen.cc
+ * \file relay/backend/contrib/uma/codegen.cc
  *
- * \brief this file contains the target hooks for generic scale4edge codegen.
+ * \brief this file contains the target hooks for the Universal Modular Accelerator Interface (UMA).
  */
 
 #include <tvm/ir/error.h>
@@ -40,13 +40,11 @@
 namespace tvm {
 namespace relay {
 namespace contrib {
-namespace generic {
+namespace uma {
 
 /*!
  * \brief This mutator lowers each external
  * relay function to a TIR PrimFunc
- *
- * TODO: Just a slightly modified copy of Ethos-U. Needs refactoring for generic use-case.
  */
 class RelayToTIRMutator : public MixedModeMutator {
  public:
@@ -75,7 +73,7 @@ class RelayToTIRMutator : public MixedModeMutator {
       auto codegen_name = func->GetAttr<String>(attr::kCompiler);
       if (codegen_name.defined() && codegen_name == target_name_) {
         auto relay_to_tir_func_pf =
-            tvm::runtime::Registry::Get("relay.ext.generic.relay_to_tir_func_" + target_name_);
+            tvm::runtime::Registry::Get("relay.ext.uma.relay_to_tir_func_" + target_name_);
         ICHECK(relay_to_tir_func_pf);
         tir::PrimFunc prim_func = (*relay_to_tir_func_pf)(func);
         prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target(target_name_));
@@ -99,10 +97,10 @@ tvm::transform::Pass RelayToTIR(String target_name) {
       [=](IRModule ir_module, transform::PassContext pass_context) {
         return RelayToTIRMutator(ir_module, target_name)();
       };
-  return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.generic.RelayToTIR", {});
+  return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.uma.RelayToTIR", {});
 }
 
-}  // namespace generic
+}  // namespace uma
 }  // namespace contrib
 }  // namespace relay
 }  // namespace tvm
diff --git a/src/relay/backend/contrib/generic/targets.cc b/src/relay/backend/contrib/uma/targets.cc
similarity index 77%
rename from src/relay/backend/contrib/generic/targets.cc
rename to src/relay/backend/contrib/uma/targets.cc
index 23fa215d60b7..c2c067287ae1 100644
--- a/src/relay/backend/contrib/generic/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -18,9 +18,9 @@
  */
 
 /*!
- * \file relay/backend/contrib/generic/codegen.cc
+ * \file relay/backend/contrib/uma/targets.cc
  *
- * \brief this file contains the targets for generic scale4edge codegen.
+ * \brief this file contains the targets for the Universal Modular Accelerator Interface (UMA).
  */
 
 #include <tvm/relay/transform.h>
@@ -30,20 +30,20 @@ namespace tvm {
 
 namespace relay {
 namespace contrib {
-namespace generic {
+namespace uma {
     tvm::transform::Pass RelayToTIR(String target_name);
 namespace ultra_trail {
     runtime::Module TIRToRuntime(IRModule mod, Target target);
 }  // namespace ultra_trail
-}  // namespace generic
+}  // namespace uma
 }  // namespace contrib
 }  // namespace relay
 
 TVM_REGISTER_TARGET_KIND("ultra_trail", kDLCPU)
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("ultra_trail"))
-    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::generic::ultra_trail::TIRToRuntime);
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("ultra_trail"))
+    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::ultra_trail::TIRToRuntime);
 
 TVM_REGISTER_TARGET_KIND("rb_npu", kDLCPU)
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::generic::RelayToTIR("rb_npu"));
+    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("rb_npu"));
 
 }  // namespace tvm
diff --git a/src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc b/src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc
similarity index 98%
rename from src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc
rename to src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc
index df0520b7ca63..aa3a0bf975bf 100644
--- a/src/relay/backend/contrib/generic/ultra_trail/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc
@@ -31,7 +31,7 @@ namespace tvm {
 using namespace tir;
 namespace relay {
 namespace contrib {
-namespace generic {
+namespace uma {
 namespace ultra_trail {
 
 class CodeGenUltraTrail : public codegen::CodeGenCHost {
@@ -68,7 +68,7 @@ runtime::Module TIRToRuntime(IRModule mod, Target target) {
 }
 
 }  // namespace ultra_trail
-}  // namespace generic
+}  // namespace uma
 }  // namespace contrib
 }  // namespace relay
 }  // namespace tvm

From 3335e20f740ffc2dbac879dd03dc3118849700c5 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Wed, 9 Feb 2022 23:31:53 +0100
Subject: [PATCH 015/112] Change API to single user-facing backend class
 UMABackend

---
 .../tvm/relay/backend/contrib/uma/__init__.py |  19 ---
 .../tvm/relay/backend/contrib/uma/backend.py  |  96 +++++++++++++++
 python/tvm/relay/backend/contrib/uma/lower.py | 112 +++---------------
 .../relay/backend/contrib/uma/partitioner.py  |  91 +++-----------
 python/tvm/relay/backend/contrib/uma/run.py   |   7 +-
 .../uma/ultra_trail/{lower.py => backend.py}  |  49 +++-----
 .../contrib/uma/ultra_trail/partitioner.py    |  34 ------
 7 files changed, 150 insertions(+), 258 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/__init__.py
 create mode 100644 python/tvm/relay/backend/contrib/uma/backend.py
 rename python/tvm/relay/backend/contrib/uma/ultra_trail/{lower.py => backend.py} (55%)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py

diff --git a/python/tvm/relay/backend/contrib/uma/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
deleted file mode 100644
index 153a63142785..000000000000
--- a/python/tvm/relay/backend/contrib/uma/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Generic NPU codegen modules."""
-from .ultra_trail import lower
-from .rb_npu import lower
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
new file mode 100644
index 000000000000..6535f0ddab4f
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Backend base class of the Universal Modular Accelerator Interface (UMA)"""
+
+import tvm
+
+from abc import abstractmethod
+from typing import List, Dict, Callable, Optional
+
+from tvm.relay.backend.contrib.uma.partitioner import UMAPartitioner
+from tvm.relay.backend.contrib.uma.lower import UMALower
+
+
+class UMABackend(object):
+    def __init__(self, variant: str = "") -> None:
+        self.variant = variant
+
+        self._relay_to_relay = UMAPartitioner(self.target_name, self.variant)
+        self._relay_to_tir = UMALower(self.target_name)
+
+    @property
+    @abstractmethod
+    def target_name(self) -> str:
+        """Name of the hardware target.
+
+        Returns
+        -------
+        out : str
+            The hardware target name.
+        """
+
+    ############################################################################
+    # Relay to Relay function registration
+    ############################################################################
+    def _register_relay_pass(self, stage: int, relay_pass: tvm.transform.Pass) -> None:
+        self._relay_to_relay._relay_passes.append((stage, relay_pass))
+
+    def _register_pattern(
+        self,
+        name: str,
+        pattern: tvm.relay.dataflow_pattern.DFPattern,
+        variants: Optional[List[str]] = None,
+    ) -> None:
+        self._relay_to_relay._patterns.append((name, pattern, [] if variants is None else variants))
+
+    ############################################################################
+    # Relay to TIR function registration
+    ############################################################################
+    def _register_operator_strategy(
+        self,
+        op: str,
+        strat: Callable[
+            [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
+            tvm.relay.op.op.OpStrategy,
+        ],
+        plevel: Optional[int] = 11,
+    ) -> None:
+        self._relay_to_tir._operator_strategies.append((op, strat, plevel))
+
+    def _register_tir_schedule(
+        self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
+    ) -> None:
+        self._relay_to_tir._tir_schedules.append(sch_func)
+
+    def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
+        self._relay_to_tir._tir_passes.append((stage, tir_pass))
+
+    ############################################################################
+    # TIR to runtime function registration
+    ############################################################################
+
+    ############################################################################
+    # Backend functions
+    ############################################################################
+    def register(self) -> None:
+        self._relay_to_relay.register()
+        self._relay_to_tir.register()
+
+    def partition(
+        self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
+    ) -> tvm.IRModule:
+        return self._relay_to_relay.partition(mod, params)
diff --git a/python/tvm/relay/backend/contrib/uma/lower.py b/python/tvm/relay/backend/contrib/uma/lower.py
index 21c3efd5dd65..6b3e1bd3c159 100644
--- a/python/tvm/relay/backend/contrib/uma/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/lower.py
@@ -18,110 +18,29 @@
 
 import tvm
 from tvm import relay, te, tir
-from tvm.relay.op import op as _op
 
-from abc import abstractmethod
 from typing import List, Tuple, Callable, Optional
 
 from .utils import extract_constants
 
 
 class UMALower(object):
-    def __init__(self) -> None:
+    def __init__(self, target_name: str) -> None:
+        self.target_name = target_name
+
+        self._operator_strategies: List[
+            Tuple[
+                str,
+                Callable[
+                    [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
+                    tvm.relay.op.op.OpStrategy,
+                ],
+                Optional[int],
+            ]
+        ] = []
         self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
 
-        self._register_operator_strategies()
-        self._register_tir_schedules()
-        self._register_tir_passes()
-
-    @abstractmethod
-    def _register_operator_strategies(self) -> None:
-        """Register a set of operator strategies which are considered during lowering from relay to TE.
-
-        Example
-        -------
-        Here is an example of how two operator strategies can be registered.
-
-        .. code-block:: python
-
-            def _register_operator_strategies(self):
-                self._register_operator_strategy(operator_strategy_0)
-                self._register_operator_strategy(operator_strategy_1)
-
-        Use `pass` if no operator strategy should be registerd.
-
-        .. code-block:: python
-
-            def _register_operator_strategies(self):
-                pass
-
-        """
-
-    @abstractmethod
-    def _register_tir_schedules(self) -> None:
-        """Register a set of TIR scheduling functions which are applied to the schedule.
-
-        Example
-        -------
-        Here is an example of how two scheduling functions can be registered.
-
-        .. code-block:: python
-
-            def _register_tir_schedules(self):
-                self._register_tir_schedule(schedule_func_0)
-                self._register_tir_schedule(schedule_func_1)
-
-        Use `pass` if no scheduling function should be registerd.
-
-        .. code-block:: python
-
-            def _register_tir_schedules(self):
-                pass
-
-        """
-
-    @abstractmethod
-    def _register_tir_passes(self) -> None:
-        """Register a set of TIR passes which are applied during lowering.
-
-        Example
-        -------
-        Here is an example of how two passes can be registered.
-
-        .. code-block:: python
-
-            def _register_tir_passes(self):
-                self._register_tir_pass(pass_0)
-                self._register_tir_pass(pass_1)
-
-        Use `pass` if no TIR pass should be registerd.
-
-        .. code-block:: python
-
-            def _register_tir_passes(self):
-                pass
-
-        """
-
-    def _register_operator_strategy(
-        self,
-        op: str,
-        strat: Callable[
-            [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target], _op.OpStrategy
-        ],
-        plevel: Optional[int] = 11,
-    ) -> None:
-        _op.register_strategy(op, strat, level=plevel)
-
-    def _register_tir_schedule(
-        self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
-    ) -> None:
-        self._tir_schedules.append(sch_func)
-
-    def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
-        self._tir_passes.append((stage, tir_pass))
-
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
 
@@ -190,3 +109,8 @@ def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
             schedule = sch_func(schedule)
         prim_func = self._lower_stir_to_nstir(schedule)
         return prim_func
+
+    def register(self) -> None:
+        tvm._ffi.register_func(
+            "relay.ext.uma.relay_to_tir_func_{}".format(self.target_name), self.relay_to_tir_func
+        )
diff --git a/python/tvm/relay/backend/contrib/uma/partitioner.py b/python/tvm/relay/backend/contrib/uma/partitioner.py
index ce8c918e5944..c8187dfbe667 100644
--- a/python/tvm/relay/backend/contrib/uma/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/partitioner.py
@@ -16,94 +16,34 @@
 # under the License.
 """Partitioner base class of the Universal Modular Accelerator Interface (UMA)"""
 
-from tvm.relay.build_module import bind_params_by_name
-from typing import Dict, List, Tuple, Optional
 import tvm
 from tvm import relay
-from abc import abstractmethod
-
+from tvm.relay.build_module import bind_params_by_name
 from tvm.relay.op.contrib.register import register_pattern_table
 
+from typing import Dict, List, Tuple, Optional
 
 class UMAPartitioner(object):
-    def __init__(self, variant: str = "") -> None:
-        self._variant = variant
+    def __init__(self, target_name: str, variant: str = "") -> None:
+        self.variant = variant
+        self.target_name = target_name
 
         self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
         self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, List[str]]] = []
 
-        self._register_relay_passes()
-        self._register_patterns()
-        register_pattern_table(self.target_name, self._pattern_table)
-
-    @property
-    @abstractmethod
-    def target_name(self) -> str:
-        """Name of the hardware target.
-
-        Returns
-        -------
-        out : str
-            The hardware target name.
-        """
-
-    @abstractmethod
-    def _register_relay_passes(self) -> None:
-        """Register a set of relay passes which are applied during lowering.
-
-        Example
-        -------
-        Here is an example of how two passes can be registered.
-
-        .. code-block:: python
-
-            def _register_relay_passes(self):
-                self._register_relay_pass(pass_0)
-                self._register_relay_pass(pass_1)
-
-        Use `pass` if no relay pass should be registerd.
-
-        .. code-block:: python
-
-            def _register_relay_passes(self):
-                pass
-
-        """
-
-    @abstractmethod
-    def _register_patterns(self) -> None:
-        """Register a set of relay graph patterns which used for partitioning.
-
-        Example
-        -------
-        Here is an example of how two patterns can be registered.
-
-        .. code-block:: python
-
-            def _register_patterns(self):
-                self._register_pattern(pattern_0)
-                self._register_pattern(pattern_1)
-        """
-
-    def _register_relay_pass(self, stage: int, relay_pass: tvm.transform.Pass) -> None:
-        self._relay_passes.append((stage, relay_pass))
-
-    def _register_pattern(
-        self,
-        name: str,
-        pattern: tvm.relay.dataflow_pattern.DFPattern,
-        variants: Optional[List[str]] = None,
-    ):
-        self._patterns.append((name, pattern, [] if variants is None else variants))
-
     def _pattern_table(self):
         return [
             (self.target_name + "." + pattern[0], pattern[1])
             for pattern in self._patterns
-            if self._variant in pattern[2] or not pattern[2]
+            if self.variant in pattern[2] or not pattern[2]
         ]
 
-    def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None) -> tvm.IRModule:
+    def register(self) -> None:
+        register_pattern_table(self.target_name, self._pattern_table)
+
+    def partition(
+        self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
+    ) -> tvm.IRModule:
         """Partition the relay graph in by the NPU supported and unsupported parts.
 
         Parameters
@@ -120,10 +60,9 @@ def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDA
         if params:
             mod["main"] = bind_params_by_name(mod["main"], params)
 
-        pattern = relay.op.contrib.get_pattern_table(self.target_name)
         mod = relay.transform.InferType()(mod)
         mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 0])(mod)
-        mod = relay.transform.MergeComposite(pattern)(mod)
+        mod = relay.transform.MergeComposite(self._pattern_table())(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
         mod = relay.transform.MergeCompilerRegions()(mod)
         mod = relay.transform.InferType()(mod)
@@ -133,9 +72,7 @@ def __call__(self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDA
         mod = relay.transform.InferType()(mod)
         # Defunctionalize the partitioned functions to allow lowering
         for gv, func in mod.functions.items():
-            mod.update_func(
-                gv, relay.transform.Defunctionalization(func, mod)
-            )
+            mod.update_func(gv, relay.transform.Defunctionalization(func, mod))
         mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 2])(mod)
 
         return mod
diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
index 933cf1eaeb4e..a5148eb08717 100644
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -1,7 +1,6 @@
 import tvm
-import tvm.relay.backend.contrib.uma
-from tvm.relay.backend.contrib.uma.ultra_trail.partitioner import UltraTrailPartitioner
 from tvm import relay
+from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
 
 import torch
 import tarfile
@@ -39,8 +38,8 @@ def main():
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
     # Relay target specific partitioning
-    # mod = UltraTrailPartitioner()(mod, params)
-    mod = UltraTrailPartitioner()(mod)
+    UltraTrailBackend().register()
+    mod = UltraTrailBackend().partition(mod)
 
     # Relay build (AOT C target)
     TARGET = tvm.target.Target("c")
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
similarity index 55%
rename from python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py
rename to python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index e6cd05c455c4..eb5b3f35408b 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -14,47 +14,36 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""UMA lowering for the UltraTrail accelerator"""
+"""UMA backend for the UltraTrail accelerator"""
 
-import tvm
-from tvm import relay
-from ..lower import UMALower
+from ..backend import UMABackend
 from .strategies import *
 from .schedules import *
 from .passes import *
+from .patterns import *
 
-
-class UltraTrailLower(UMALower):
+class UltraTrailBackend(UMABackend):
     def __init__(self):
-        super(UltraTrailLower, self).__init__()
+        super(UltraTrailBackend, self).__init__()
+
+        #######################################################################
+        # Relay to Relay function registration
+        #######################################################################
+        self._register_pattern("conv1d_relu", conv1d_relu_pattern())
 
-    def _register_operator_strategies(self):
+        self._register_relay_pass(1, ConfigGenerator())
+        self._register_relay_pass(2, BufferScopeAnnotator())
+
+        #######################################################################
+        # Relay to TIR function registration
+        #######################################################################
         self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
 
-    def _register_tir_schedules(self):
         self._register_tir_schedule(insert_extern_calls)
 
-    def _register_tir_passes(self):
         self._register_tir_pass(0, CodegenGenerateConfig())
         self._register_tir_pass(0, CodegenGenerateConstants())
 
-
-@tvm._ffi.register_func("relay.ext.uma.relay_to_tir_func_ultra_trail")
-def relay_to_tir_func_ultra_trail(ext_func: relay.Function) -> tvm.tir.PrimFunc:
-    """
-    This is the hook for python-based lowering of relay function
-    that gets offloaded to the UltraTrail accelerator.
-
-    Parameters
-    ----------
-    ext_func : relay.Function
-        This is the partitioned relay function
-
-    Returns
-    -------
-    prim_func : tir.PrimFunc
-        This returns the scheduled PrimFunc
-    """
-    codegen = UltraTrailLower()
-    prim_func = codegen.relay_to_tir_func(ext_func)
-    return prim_func
+    @property
+    def target_name(self):
+        return "ultra_trail"
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py
deleted file mode 100644
index f5ec36a49425..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/partitioner.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""UMA partitioner for the UltraTrail accelerator"""
-
-from .passes import *
-from .patterns import *
-from ..partitioner import UMAPartitioner
-
-
-class UltraTrailPartitioner(UMAPartitioner):
-    @property
-    def target_name(self):
-        return "ultra_trail"
-
-    def _register_patterns(self):
-        self._register_pattern("conv1d_relu", conv1d_relu_pattern())
-    
-    def _register_relay_passes(self):
-        self._register_relay_pass(1, ConfigGenerator())
-        self._register_relay_pass(2, BufferScopeAnnotator())

From 03398f9bf9b9ade85472569789397482d551aa01 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Tue, 1 Mar 2022 22:23:56 +0100
Subject: [PATCH 016/112] Add initial codegen API

---
 cmake/modules/contrib/UMA.cmake               |  3 +-
 .../tvm/relay/backend/contrib/uma/backend.py  |  5 ++
 .../tvm/relay/backend/contrib/uma/codegen.py  | 49 +++++++++++++++++++
 python/tvm/relay/backend/contrib/uma/run.py   |  5 +-
 .../contrib/uma/ultra_trail/backend.py        | 15 ++++++
 .../uma/{codegen.cc => relay_to_tir.cc}       |  0
 src/relay/backend/contrib/uma/targets.cc      |  4 +-
 .../uma/{ultra_trail => }/tir_to_runtime.cc   | 19 ++++---
 8 files changed, 83 insertions(+), 17 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/codegen.py
 rename src/relay/backend/contrib/uma/{codegen.cc => relay_to_tir.cc} (100%)
 rename src/relay/backend/contrib/uma/{ultra_trail => }/tir_to_runtime.cc (82%)

diff --git a/cmake/modules/contrib/UMA.cmake b/cmake/modules/contrib/UMA.cmake
index 8adf0c7efe3f..1d3a9a30ec0f 100644
--- a/cmake/modules/contrib/UMA.cmake
+++ b/cmake/modules/contrib/UMA.cmake
@@ -17,7 +17,6 @@
 
 if(USE_UMA)
   file(GLOB COMPILER_UMA_SRCS
-       CONFIGURE_DEPENDS src/relay/backend/contrib/uma/*
-       CONFIGURE_DEPENDS src/relay/backend/contrib/uma/ultra_trail/*)
+       CONFIGURE_DEPENDS src/relay/backend/contrib/uma/*)
   list(APPEND COMPILER_SRCS ${COMPILER_UMA_SRCS})
 endif(USE_UMA)
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 6535f0ddab4f..a4ee1f4ec937 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -23,6 +23,7 @@
 
 from tvm.relay.backend.contrib.uma.partitioner import UMAPartitioner
 from tvm.relay.backend.contrib.uma.lower import UMALower
+from tvm.relay.backend.contrib.uma.codegen import UMACodegen
 
 
 class UMABackend(object):
@@ -31,6 +32,7 @@ def __init__(self, variant: str = "") -> None:
 
         self._relay_to_relay = UMAPartitioner(self.target_name, self.variant)
         self._relay_to_tir = UMALower(self.target_name)
+        self._tir_to_runtime = UMACodegen(self.target_name)
 
     @property
     @abstractmethod
@@ -82,6 +84,8 @@ def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPas
     ############################################################################
     # TIR to runtime function registration
     ############################################################################
+    def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
+        self._tir_to_runtime._register_codegen(fmt, **kwargs)
 
     ############################################################################
     # Backend functions
@@ -89,6 +93,7 @@ def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPas
     def register(self) -> None:
         self._relay_to_relay.register()
         self._relay_to_tir.register()
+        self._tir_to_runtime.register()
 
     def partition(
         self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
diff --git a/python/tvm/relay/backend/contrib/uma/codegen.py b/python/tvm/relay/backend/contrib/uma/codegen.py
new file mode 100644
index 000000000000..ab3a3731c724
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/codegen.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen base class of the Universal Modular Accelerator Interface (UMA)"""
+
+import tvm
+
+from typing import Callable
+
+
+class UMACodegen(object):
+    def __init__(self, target_name: str) -> None:
+        self.target_name = target_name
+
+    def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
+        if fmt == "c":
+            self._register_c_codegen(**kwargs)
+        else:
+            raise RuntimeError(f'Unsupported codegen format "{fmt}"')
+
+    def _register_c_codegen(
+        self, includes: Callable, constants: Callable, replace_call_extern: Callable
+    ) -> None:
+        tvm._ffi.register_func(
+            "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
+        )
+        tvm._ffi.register_func(
+            "relay.ext.uma.codegen_c_constants_{}".format(self.target_name), constants
+        )
+        tvm._ffi.register_func(
+            "relay.ext.uma.codegen_c_replace_call_extern_{}".format(self.target_name),
+            replace_call_extern,
+        )
+
+    def register(self) -> None:
+        pass
diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
index a5148eb08717..550e05ade818 100644
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -38,8 +38,9 @@ def main():
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
     # Relay target specific partitioning
-    UltraTrailBackend().register()
-    mod = UltraTrailBackend().partition(mod)
+    ut_backend = UltraTrailBackend()
+    ut_backend.register()
+    mod = ut_backend.partition(mod)
 
     # Relay build (AOT C target)
     TARGET = tvm.target.Target("c")
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index eb5b3f35408b..ea51639d7463 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -22,6 +22,7 @@
 from .passes import *
 from .patterns import *
 
+
 class UltraTrailBackend(UMABackend):
     def __init__(self):
         super(UltraTrailBackend, self).__init__()
@@ -44,6 +45,20 @@ def __init__(self):
         self._register_tir_pass(0, CodegenGenerateConfig())
         self._register_tir_pass(0, CodegenGenerateConstants())
 
+        #######################################################################
+        # TIR to runtime function registration
+        #######################################################################
+        self._register_codegen(
+            fmt="c", includes=self.gen_includes, constants=None, replace_call_extern=None
+        )
+
+    def gen_includes(self) -> str:
+        includes = ""
+        includes += "#include <cmem.h>\n"
+        includes += "#include <archi.h>\n"
+        includes += "#include <hal.h>\n"
+        return includes
+
     @property
     def target_name(self):
         return "ultra_trail"
diff --git a/src/relay/backend/contrib/uma/codegen.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
similarity index 100%
rename from src/relay/backend/contrib/uma/codegen.cc
rename to src/relay/backend/contrib/uma/relay_to_tir.cc
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index c2c067287ae1..f99375b18d15 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -32,16 +32,14 @@ namespace relay {
 namespace contrib {
 namespace uma {
     tvm::transform::Pass RelayToTIR(String target_name);
-namespace ultra_trail {
     runtime::Module TIRToRuntime(IRModule mod, Target target);
-}  // namespace ultra_trail
 }  // namespace uma
 }  // namespace contrib
 }  // namespace relay
 
 TVM_REGISTER_TARGET_KIND("ultra_trail", kDLCPU)
     .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("ultra_trail"))
-    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::ultra_trail::TIRToRuntime);
+    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
 TVM_REGISTER_TARGET_KIND("rb_npu", kDLCPU)
     .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("rb_npu"));
diff --git a/src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
similarity index 82%
rename from src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc
rename to src/relay/backend/contrib/uma/tir_to_runtime.cc
index aa3a0bf975bf..ae90cc48a8a6 100644
--- a/src/relay/backend/contrib/uma/ultra_trail/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -23,23 +23,23 @@
 #include <string>
 #include <vector>
 
-#include "../../../../../runtime/file_utils.h"
-#include "../../../../../target/source/codegen_c.h"
-#include "../../../../../target/source/codegen_c_host.h"
+#include "../../../../runtime/file_utils.h"
+#include "../../../../target/source/codegen_c.h"
+#include "../../../../target/source/codegen_c_host.h"
 
 namespace tvm {
 using namespace tir;
 namespace relay {
 namespace contrib {
 namespace uma {
-namespace ultra_trail {
 
-class CodeGenUltraTrail : public codegen::CodeGenCHost {
+class UMACodegen : public codegen::CodeGenCHost {
  public:
   void Init(bool output_ssa, bool emit_asserts, std::string target_str) {
-    decl_stream << "#include <cmem.h>\n";
-    decl_stream << "#include <archi_hwpe.h>\n";
-    decl_stream << "#include <hal_hwpe.h>\n";
+    auto includes_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str);
+    ICHECK(includes_pf);
+    String includes = (*includes_pf)();
+    decl_stream << includes;
     CodeGenCHost::Init(output_ssa, emit_asserts, target_str);
   }
 
@@ -54,7 +54,7 @@ class CodeGenUltraTrail : public codegen::CodeGenCHost {
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
   bool emit_asserts = false;
-  CodeGenUltraTrail codegen;
+  UMACodegen codegen;
   Array<String> function_names;
   codegen.Init(output_ssa, emit_asserts, target->str());
   for (auto kv : mod->functions) {
@@ -67,7 +67,6 @@ runtime::Module TIRToRuntime(IRModule mod, Target target) {
   return codegen::CSourceModuleCreate(code, "c", function_names);
 }
 
-}  // namespace ultra_trail
 }  // namespace uma
 }  // namespace contrib
 }  // namespace relay

From 6cd220e67449f2e1e89f28f5cac13fa0765c4c3d Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Mon, 14 Mar 2022 17:11:17 +0100
Subject: [PATCH 017/112] [UMA] add a generic packed function to register
 targets

---
 .../tvm/relay/backend/contrib/uma/backend.py  |  5 ++++-
 src/relay/backend/contrib/uma/targets.cc      | 21 +++++++++++++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index a4ee1f4ec937..86a4711dfa88 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -25,7 +25,6 @@
 from tvm.relay.backend.contrib.uma.lower import UMALower
 from tvm.relay.backend.contrib.uma.codegen import UMACodegen
 
-
 class UMABackend(object):
     def __init__(self, variant: str = "") -> None:
         self.variant = variant
@@ -91,6 +90,10 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
     # Backend functions
     ############################################################################
     def register(self) -> None:
+
+        registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
+        registration_func(self.target_name)
+
         self._relay_to_relay.register()
         self._relay_to_tir.register()
         self._tir_to_runtime.register()
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index f99375b18d15..812943f9fa94 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -37,11 +37,20 @@ namespace uma {
 }  // namespace contrib
 }  // namespace relay
 
-TVM_REGISTER_TARGET_KIND("ultra_trail", kDLCPU)
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("ultra_trail"))
-    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
-
-TVM_REGISTER_TARGET_KIND("rb_npu", kDLCPU)
-    .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR("rb_npu"));
+TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
+    .set_body_typed([](String target_name){
+        ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
+        .set_name()
+        .set_device_type(kDLCPU)
+        .add_attr_option<Array<String>>("keys")
+        .add_attr_option<String>("tag")
+        .add_attr_option<String>("device")
+        .add_attr_option<String>("model")
+        .add_attr_option<Array<String>>("libs")
+        .add_attr_option<Target>("host")
+        .add_attr_option<Integer>("from_device")
+        .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
+        .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
+    });
 
 }  // namespace tvm

From 0b6cccdeb3c3b4b5c953dc14489e888ad8621381 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 14 Mar 2022 18:30:06 +0100
Subject: [PATCH 018/112] Restructure files and add initial codegen

---
 .../backend/contrib/uma/{ => api}/codegen.py  |  0
 .../backend/contrib/uma/{ => api}/lower.py    |  0
 .../contrib/uma/{ => api}/partitioner.py      |  0
 .../backend/contrib/uma/{ => api}/utils.py    |  0
 .../tvm/relay/backend/contrib/uma/backend.py  |  6 ++---
 .../contrib/uma/ultra_trail/backend.py        | 10 ++------
 .../contrib/uma/ultra_trail/codegen.py        | 25 +++++++++++++++++++
 7 files changed, 30 insertions(+), 11 deletions(-)
 rename python/tvm/relay/backend/contrib/uma/{ => api}/codegen.py (100%)
 rename python/tvm/relay/backend/contrib/uma/{ => api}/lower.py (100%)
 rename python/tvm/relay/backend/contrib/uma/{ => api}/partitioner.py (100%)
 rename python/tvm/relay/backend/contrib/uma/{ => api}/utils.py (100%)
 create mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py

diff --git a/python/tvm/relay/backend/contrib/uma/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/codegen.py
rename to python/tvm/relay/backend/contrib/uma/api/codegen.py
diff --git a/python/tvm/relay/backend/contrib/uma/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/lower.py
rename to python/tvm/relay/backend/contrib/uma/api/lower.py
diff --git a/python/tvm/relay/backend/contrib/uma/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/partitioner.py
rename to python/tvm/relay/backend/contrib/uma/api/partitioner.py
diff --git a/python/tvm/relay/backend/contrib/uma/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/utils.py
rename to python/tvm/relay/backend/contrib/uma/api/utils.py
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 86a4711dfa88..5da4be6894e8 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -21,9 +21,9 @@
 from abc import abstractmethod
 from typing import List, Dict, Callable, Optional
 
-from tvm.relay.backend.contrib.uma.partitioner import UMAPartitioner
-from tvm.relay.backend.contrib.uma.lower import UMALower
-from tvm.relay.backend.contrib.uma.codegen import UMACodegen
+from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
+from tvm.relay.backend.contrib.uma.api.lower import UMALower
+from tvm.relay.backend.contrib.uma.api.codegen import UMACodegen
 
 class UMABackend(object):
     def __init__(self, variant: str = "") -> None:
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index ea51639d7463..013e841a1d1e 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -21,6 +21,7 @@
 from .schedules import *
 from .passes import *
 from .patterns import *
+from .codegen import gen_includes
 
 
 class UltraTrailBackend(UMABackend):
@@ -49,16 +50,9 @@ def __init__(self):
         # TIR to runtime function registration
         #######################################################################
         self._register_codegen(
-            fmt="c", includes=self.gen_includes, constants=None, replace_call_extern=None
+            fmt="c", includes=gen_includes, constants=None, replace_call_extern=None
         )
 
-    def gen_includes(self) -> str:
-        includes = ""
-        includes += "#include <cmem.h>\n"
-        includes += "#include <archi.h>\n"
-        includes += "#include <hal.h>\n"
-        return includes
-
     @property
     def target_name(self):
         return "ultra_trail"
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
new file mode 100644
index 000000000000..836c94391213
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA codegen for the UltraTrail accelerator"""
+
+
+def gen_includes() -> str:
+    includes = ""
+    includes += "#include <cmem.h>\n"
+    includes += "#include <archi.h>\n"
+    includes += "#include <hal.h>\n"
+    return includes

From bd44fa88d056a6fa7b2aadd8152b769aff5a03f8 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Tue, 15 Mar 2022 10:21:32 +0100
Subject: [PATCH 019/112] Minor code cleanup

---
 python/tvm/relay/backend/contrib/uma/backend.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 5da4be6894e8..f4e07e5c09ce 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -18,7 +18,7 @@
 
 import tvm
 
-from abc import abstractmethod
+from abc import abstractmethod, abstractproperty
 from typing import List, Dict, Callable, Optional
 
 from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
@@ -33,8 +33,7 @@ def __init__(self, variant: str = "") -> None:
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
 
-    @property
-    @abstractmethod
+    @abstractproperty
     def target_name(self) -> str:
         """Name of the hardware target.
 
@@ -43,6 +42,7 @@ def target_name(self) -> str:
         out : str
             The hardware target name.
         """
+        ...
 
     ############################################################################
     # Relay to Relay function registration

From bf641ee943c05f028d388c1fb0b2af596dd8c374 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 17 Mar 2022 12:31:46 +0100
Subject: [PATCH 020/112] Add UMA config and MergeCompilerRegion example

---
 .../relay/backend/contrib/uma/api/codegen.py  |  5 ++++
 .../relay/backend/contrib/uma/api/lower.py    |  6 ++++-
 .../backend/contrib/uma/api/partitioner.py    |  9 ++++++-
 .../relay/backend/contrib/uma/api/utils.py    | 25 ++++++++++++++++++-
 .../tvm/relay/backend/contrib/uma/backend.py  |  9 +++++++
 .../contrib/uma/ultra_trail/backend.py        |  5 ++++
 6 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index ab3a3731c724..d3e6c5abbed0 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -19,11 +19,16 @@
 import tvm
 
 from typing import Callable
+from .utils import check_config
 
 
 class UMACodegen(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
+        self.config = {}
+
+    def _register_config(self, config: dict) -> None:
+        self.config.update(check_config(config, self.config))
 
     def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         if fmt == "c":
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 6b3e1bd3c159..b79c917bf6d8 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -21,12 +21,13 @@
 
 from typing import List, Tuple, Callable, Optional
 
-from .utils import extract_constants
+from .utils import extract_constants, check_config
 
 
 class UMALower(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
+        self.config = {}
 
         self._operator_strategies: List[
             Tuple[
@@ -41,6 +42,9 @@ def __init__(self, target_name: str) -> None:
         self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
 
+    def _register_config(self, config: dict) -> None:
+        self.config.update(check_config(config, self.config))
+
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index c8187dfbe667..20a7f428415b 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -22,15 +22,21 @@
 from tvm.relay.op.contrib.register import register_pattern_table
 
 from typing import Dict, List, Tuple, Optional
+from .utils import check_config
+
 
 class UMAPartitioner(object):
     def __init__(self, target_name: str, variant: str = "") -> None:
         self.variant = variant
         self.target_name = target_name
+        self.config = {"enable_MergeCompilerRegion": True}
 
         self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
         self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, List[str]]] = []
 
+    def _register_config(self, config: dict) -> None:
+        self.config.update(check_config(config, self.config))
+
     def _pattern_table(self):
         return [
             (self.target_name + "." + pattern[0], pattern[1])
@@ -64,7 +70,8 @@ def partition(
         mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 0])(mod)
         mod = relay.transform.MergeComposite(self._pattern_table())(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
-        mod = relay.transform.MergeCompilerRegions()(mod)
+        if self.config["enable_MergeCompilerRegion"]:
+            mod = relay.transform.MergeCompilerRegions()(mod)
         mod = relay.transform.InferType()(mod)
         mod = relay.transform.PartitionGraph()(mod)
         mod = relay.transform.InferType()(mod)
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index 3519d830ddd1..d2b217255595 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -20,6 +20,12 @@
 from tvm import relay
 from tvm.relay.expr_functor import ExprMutator
 
+from enum import Enum
+
+
+##############################
+# Extract constants workaround
+##############################
 class ExtractConstants(ExprMutator):
     """The actual mutator pass to extract the constants from a function and replace them with
     Vars so the function can be lowered to a TE graph. Additionally returns all the values of
@@ -27,7 +33,7 @@ class ExtractConstants(ExprMutator):
 
     def __init__(self):
         super().__init__()
-        self.constants = {} 
+        self.constants = {}
         self.const_vars = []
 
     def visit_constant(self, const):
@@ -70,3 +76,20 @@ def extract_constants(func):
         func.attrs["global_symbol"]
     ]
     return new_func, consts
+
+
+###################################
+# API configuration parameter check
+###################################
+class UMAConfigStatus(Enum):
+    UNSUPPORTED = 0
+    DEPRECATED = 1
+
+def check_config(new_config, ref_config):
+    for param in new_config.keys():
+        status = ref_config.get(param, UMAConfigStatus.UNSUPPORTED)
+        if status == UMAConfigStatus.UNSUPPORTED:
+            raise KeyError(f"\"{param}\" is not a supported UMA configuration parameter.")
+        elif status == UMAConfigStatus.DEPRECATED:
+            raise DeprecationWarning(f"{param} is deprecated.")
+    return new_config
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index f4e07e5c09ce..7cacff8c95df 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -25,6 +25,7 @@
 from tvm.relay.backend.contrib.uma.api.lower import UMALower
 from tvm.relay.backend.contrib.uma.api.codegen import UMACodegen
 
+
 class UMABackend(object):
     def __init__(self, variant: str = "") -> None:
         self.variant = variant
@@ -44,6 +45,14 @@ def target_name(self) -> str:
         """
         ...
 
+    ############################################################################
+    # Configuration registration
+    ############################################################################
+    def _register_config(self, config: dict) -> None:
+        self._relay_to_relay._register_config(config.get("UMAPartitioner", {}))
+        self._relay_to_tir._register_config(config.get("UMALower", {}))
+        self._tir_to_runtime._register_config(config.get("UMACodegen", {}))
+
     ############################################################################
     # Relay to Relay function registration
     ############################################################################
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index 013e841a1d1e..fa2be8344411 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -28,6 +28,11 @@ class UltraTrailBackend(UMABackend):
     def __init__(self):
         super(UltraTrailBackend, self).__init__()
 
+        ############################################################################
+        # Configuration registration
+        ############################################################################
+        self._register_config({"UMAPartitioner": {"enable_MergeCompilerRegion": True}})
+
         #######################################################################
         # Relay to Relay function registration
         #######################################################################

From 222cbb65e104d63b2d9f0dc6b604f525eb62527e Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 17 Mar 2022 16:42:45 +0100
Subject: [PATCH 021/112] Move UMA configuration to init parameters

---
 .../relay/backend/contrib/uma/api/codegen.py  |  5 -----
 .../relay/backend/contrib/uma/api/lower.py    |  6 +-----
 .../backend/contrib/uma/api/partitioner.py    | 14 ++++++--------
 .../relay/backend/contrib/uma/api/utils.py    | 17 -----------------
 .../tvm/relay/backend/contrib/uma/backend.py  | 19 +++++--------------
 .../contrib/uma/ultra_trail/backend.py        |  5 -----
 6 files changed, 12 insertions(+), 54 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index d3e6c5abbed0..ab3a3731c724 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -19,16 +19,11 @@
 import tvm
 
 from typing import Callable
-from .utils import check_config
 
 
 class UMACodegen(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
-        self.config = {}
-
-    def _register_config(self, config: dict) -> None:
-        self.config.update(check_config(config, self.config))
 
     def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         if fmt == "c":
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index b79c917bf6d8..6b3e1bd3c159 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -21,13 +21,12 @@
 
 from typing import List, Tuple, Callable, Optional
 
-from .utils import extract_constants, check_config
+from .utils import extract_constants
 
 
 class UMALower(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
-        self.config = {}
 
         self._operator_strategies: List[
             Tuple[
@@ -42,9 +41,6 @@ def __init__(self, target_name: str) -> None:
         self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
 
-    def _register_config(self, config: dict) -> None:
-        self.config.update(check_config(config, self.config))
-
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 20a7f428415b..0c7640a77990 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -22,21 +22,19 @@
 from tvm.relay.op.contrib.register import register_pattern_table
 
 from typing import Dict, List, Tuple, Optional
-from .utils import check_config
 
 
 class UMAPartitioner(object):
-    def __init__(self, target_name: str, variant: str = "") -> None:
-        self.variant = variant
+    def __init__(
+        self, target_name: str, variant: str = "", merge_compiler_regions: bool = True
+    ) -> None:
         self.target_name = target_name
-        self.config = {"enable_MergeCompilerRegion": True}
+        self.variant = variant
+        self.merge_compiler_regions = merge_compiler_regions
 
         self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
         self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, List[str]]] = []
 
-    def _register_config(self, config: dict) -> None:
-        self.config.update(check_config(config, self.config))
-
     def _pattern_table(self):
         return [
             (self.target_name + "." + pattern[0], pattern[1])
@@ -70,7 +68,7 @@ def partition(
         mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 0])(mod)
         mod = relay.transform.MergeComposite(self._pattern_table())(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
-        if self.config["enable_MergeCompilerRegion"]:
+        if self.merge_compiler_regions:
             mod = relay.transform.MergeCompilerRegions()(mod)
         mod = relay.transform.InferType()(mod)
         mod = relay.transform.PartitionGraph()(mod)
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index d2b217255595..d8b056fd7061 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -76,20 +76,3 @@ def extract_constants(func):
         func.attrs["global_symbol"]
     ]
     return new_func, consts
-
-
-###################################
-# API configuration parameter check
-###################################
-class UMAConfigStatus(Enum):
-    UNSUPPORTED = 0
-    DEPRECATED = 1
-
-def check_config(new_config, ref_config):
-    for param in new_config.keys():
-        status = ref_config.get(param, UMAConfigStatus.UNSUPPORTED)
-        if status == UMAConfigStatus.UNSUPPORTED:
-            raise KeyError(f"\"{param}\" is not a supported UMA configuration parameter.")
-        elif status == UMAConfigStatus.DEPRECATED:
-            raise DeprecationWarning(f"{param} is deprecated.")
-    return new_config
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 7cacff8c95df..61bb8043842f 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -18,7 +18,7 @@
 
 import tvm
 
-from abc import abstractmethod, abstractproperty
+from abc import abstractmethod
 from typing import List, Dict, Callable, Optional
 
 from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
@@ -27,14 +27,13 @@
 
 
 class UMABackend(object):
-    def __init__(self, variant: str = "") -> None:
-        self.variant = variant
-
-        self._relay_to_relay = UMAPartitioner(self.target_name, self.variant)
+    def __init__(self, variant: str = "", merge_compiler_region: bool = True) -> None:
+        self._relay_to_relay = UMAPartitioner(self.target_name, variant, merge_compiler_region)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
 
-    @abstractproperty
+    @property
+    @abstractmethod
     def target_name(self) -> str:
         """Name of the hardware target.
 
@@ -45,14 +44,6 @@ def target_name(self) -> str:
         """
         ...
 
-    ############################################################################
-    # Configuration registration
-    ############################################################################
-    def _register_config(self, config: dict) -> None:
-        self._relay_to_relay._register_config(config.get("UMAPartitioner", {}))
-        self._relay_to_tir._register_config(config.get("UMALower", {}))
-        self._tir_to_runtime._register_config(config.get("UMACodegen", {}))
-
     ############################################################################
     # Relay to Relay function registration
     ############################################################################
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index fa2be8344411..013e841a1d1e 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -28,11 +28,6 @@ class UltraTrailBackend(UMABackend):
     def __init__(self):
         super(UltraTrailBackend, self).__init__()
 
-        ############################################################################
-        # Configuration registration
-        ############################################################################
-        self._register_config({"UMAPartitioner": {"enable_MergeCompilerRegion": True}})
-
         #######################################################################
         # Relay to Relay function registration
         #######################################################################

From 54d7c52cd06321b09f3758032cc650f583b13125 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 21 Mar 2022 11:52:24 +0100
Subject: [PATCH 022/112] Add python hooks for C-codegen. Still has known
 restrictons

---
 .../relay/backend/contrib/uma/api/codegen.py  |  7 ++--
 .../contrib/uma/ultra_trail/backend.py        |  4 +--
 .../contrib/uma/ultra_trail/codegen.py        |  6 ++++
 .../backend/contrib/uma/tir_to_runtime.cc     | 36 ++++++++++++++++---
 4 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index ab3a3731c724..8e7f6ccbfd94 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -32,14 +32,13 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
             raise RuntimeError(f'Unsupported codegen format "{fmt}"')
 
     def _register_c_codegen(
-        self, includes: Callable, constants: Callable, replace_call_extern: Callable
+        self,
+        includes: Callable[[], str],
+        replace_call_extern: Callable[[tvm.ir.container.Array], str],
     ) -> None:
         tvm._ffi.register_func(
             "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
         )
-        tvm._ffi.register_func(
-            "relay.ext.uma.codegen_c_constants_{}".format(self.target_name), constants
-        )
         tvm._ffi.register_func(
             "relay.ext.uma.codegen_c_replace_call_extern_{}".format(self.target_name),
             replace_call_extern,
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index 013e841a1d1e..f05d3adb4d0a 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -21,7 +21,7 @@
 from .schedules import *
 from .passes import *
 from .patterns import *
-from .codegen import gen_includes
+from .codegen import *
 
 
 class UltraTrailBackend(UMABackend):
@@ -50,7 +50,7 @@ def __init__(self):
         # TIR to runtime function registration
         #######################################################################
         self._register_codegen(
-            fmt="c", includes=gen_includes, constants=None, replace_call_extern=None
+            fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
         )
 
     @property
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
index 836c94391213..1468b7953e59 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
@@ -16,6 +16,8 @@
 # under the License.
 """UMA codegen for the UltraTrail accelerator"""
 
+import tvm
+
 
 def gen_includes() -> str:
     includes = ""
@@ -23,3 +25,7 @@ def gen_includes() -> str:
     includes += "#include <archi.h>\n"
     includes += "#include <hal.h>\n"
     return includes
+
+
+def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
+    return "my_custom_api_function({}, {}, {})".format(*args)
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index ae90cc48a8a6..b578b98a20ae 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -35,12 +35,14 @@ namespace uma {
 
 class UMACodegen : public codegen::CodeGenCHost {
  public:
-  void Init(bool output_ssa, bool emit_asserts, std::string target_str) {
-    auto includes_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str);
+  UMACodegen(String target_str) : target_str_(target_str) {}
+
+  void Init(bool output_ssa, bool emit_asserts) {
+    auto includes_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str_);
     ICHECK(includes_pf);
     String includes = (*includes_pf)();
     decl_stream << includes;
-    CodeGenCHost::Init(output_ssa, emit_asserts, target_str);
+    CodeGenCHost::Init(output_ssa, emit_asserts, target_str_);
   }
 
   /*!
@@ -49,14 +51,38 @@ class UMACodegen : public codegen::CodeGenCHost {
    * \return string of code that offloads a subgraph to the Cortex-M
    */
   void AddFunction(const PrimFunc& prim_func) { CodeGenC::AddFunction(prim_func); }
+
+ private:
+  String target_str_;
+
+  using codegen::CodeGenCHost::VisitStmt_;
+
+  /*!  * \brief Emits target specific APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) final {
+    if (!op->op.same_as(builtin::call_extern())) {
+      CodeGenCHost::VisitExpr_(op, os);
+      return;
+    }
+    auto replace_call_extern_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_replace_call_extern_" + target_str_);
+    if (replace_call_extern_pf == nullptr) {
+      CodeGenCHost::VisitExpr_(op, os);
+    } else {
+      // TODO:
+      // - funtion type (void) still gets printed before CallNode if extern call is wrapped in EvaluateNode
+      // - VarNode arguments might have "wrong" name_hints. The correct variable name is determined in C++ through GetVarID
+      String api_string = (*replace_call_extern_pf)(op->args);
+      os << api_string;
+    }
+    return;
+  }
 };
 
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
   bool emit_asserts = false;
-  UMACodegen codegen;
+  UMACodegen codegen (target->str());
   Array<String> function_names;
-  codegen.Init(output_ssa, emit_asserts, target->str());
+  codegen.Init(output_ssa, emit_asserts);
   for (auto kv : mod->functions) {
     auto prim_func = Downcast<PrimFunc>(kv.second);
     auto global_symbol = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol);

From 67940e1176501c541b9b9a6918ea98c6a0cad60c Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 24 Mar 2022 16:19:51 +0100
Subject: [PATCH 023/112] Fix relay_to_tir hook to keep virtual device in main
 function

---
 src/relay/backend/contrib/uma/relay_to_tir.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index f9168ea08fa6..55b17ccb0fea 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -57,9 +57,7 @@ class RelayToTIRMutator : public MixedModeMutator {
     Function main_func = Downcast<Function>(ir_module_->Lookup(main_global_var));
 
     // Copy everything across and mutate the body
-    Function mutated_main =
-        Function(main_func->params, VisitExpr(main_func->body), main_func->ret_type,
-                 main_func->type_params, main_func->attrs, main_func->span);
+    Function mutated_main = WithFields(main_func, main_func->params, VisitExpr(main_func->body));
 
     ir_module_->Update(main_global_var, mutated_main);
 

From 83476e1cfc883880bb04395b5782eb116733bb86 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 25 Mar 2022 12:45:02 +0100
Subject: [PATCH 024/112] Remove register schedules, scheduling is moved to
 passes for now

---
 .../relay/backend/contrib/uma/api/lower.py    | 29 +++++++------
 .../tvm/relay/backend/contrib/uma/backend.py  |  6 ---
 .../contrib/uma/ultra_trail/backend.py        |  6 +--
 .../backend/contrib/uma/ultra_trail/passes.py | 35 ++++++++++-----
 .../contrib/uma/ultra_trail/schedules.py      | 43 -------------------
 5 files changed, 41 insertions(+), 78 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 6b3e1bd3c159..5693dd8b2210 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -17,7 +17,7 @@
 """Lowering base class of the Universal Modular Accelerator Interface (UMA)"""
 
 import tvm
-from tvm import relay, te, tir
+from tvm import relay, te
 
 from typing import List, Tuple, Callable, Optional
 
@@ -38,7 +38,6 @@ def __init__(self, target_name: str) -> None:
                 Optional[int],
             ]
         ] = []
-        self._tir_schedules: List[Callable[[tvm.tir.Schedule], tvm.tir.Schedule]] = []
         self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
 
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
@@ -66,13 +65,13 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
-    def _lower_stir_to_nstir(self, schedule: tvm.tir.Schedule) -> tvm.tir.PrimFunc:
-        """Lower a S-TIR schedule to a NS-TIR primitive function.
+    def _lower_stir_to_nstir(self, prim_func: tvm.tir.PrimFunc) -> tvm.tir.PrimFunc:
+        """Lower a S-TIR primitive function to a NS-TIR primitive function.
 
         Parameters
         ----------
-        schedule : tvm.tir.Schedule
-            The schedule to lower.
+        prim_func : tvm.tir.PrimFunc
+            The primitive function to lower.
 
         Returns
         -------
@@ -80,11 +79,18 @@ def _lower_stir_to_nstir(self, schedule: tvm.tir.Schedule) -> tvm.tir.PrimFunc:
             The lowered non-schedulable TensorIR primitive function.
 
         """
+        curr_ctxt = tvm.transform.PassContext().current()
+        assert "tir.add_lower_pass" not in curr_ctxt.config
+
         with tvm.transform.PassContext(
-            config={"tir.add_lower_pass": self._tir_passes}, opt_level=0
+            opt_level=curr_ctxt.opt_level,
+            required_pass=curr_ctxt.required_pass,
+            disabled_pass=curr_ctxt.disabled_pass,
+            instruments=curr_ctxt.instruments,
+            config={**dict(curr_ctxt.config), "tir.add_lower_pass": self._tir_passes},
         ):
-            mod = tvm.lower(schedule.mod)
-        prim_func = mod["main"]
+            mod = tvm.lower(tvm.ir.IRModule.from_expr(prim_func))
+        prim_func = mod[prim_func.attrs["global_symbol"]]
         return prim_func
 
     def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
@@ -104,10 +110,7 @@ def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
 
         """
         prim_func = self._lower_relay_to_tir(ext_func)
-        schedule = tir.Schedule(prim_func)
-        for sch_func in self._tir_schedules:
-            schedule = sch_func(schedule)
-        prim_func = self._lower_stir_to_nstir(schedule)
+        prim_func = self._lower_stir_to_nstir(prim_func)
         return prim_func
 
     def register(self) -> None:
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 61bb8043842f..bce86020614c 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -72,11 +72,6 @@ def _register_operator_strategy(
     ) -> None:
         self._relay_to_tir._operator_strategies.append((op, strat, plevel))
 
-    def _register_tir_schedule(
-        self, sch_func: Callable[[tvm.tir.Schedule], tvm.tir.Schedule]
-    ) -> None:
-        self._relay_to_tir._tir_schedules.append(sch_func)
-
     def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
         self._relay_to_tir._tir_passes.append((stage, tir_pass))
 
@@ -90,7 +85,6 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
     # Backend functions
     ############################################################################
     def register(self) -> None:
-
         registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
         registration_func(self.target_name)
 
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index f05d3adb4d0a..f96d1082960e 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -18,7 +18,6 @@
 
 from ..backend import UMABackend
 from .strategies import *
-from .schedules import *
 from .passes import *
 from .patterns import *
 from .codegen import *
@@ -41,10 +40,7 @@ def __init__(self):
         #######################################################################
         self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
 
-        self._register_tir_schedule(insert_extern_calls)
-
-        self._register_tir_pass(0, CodegenGenerateConfig())
-        self._register_tir_pass(0, CodegenGenerateConstants())
+        self._register_tir_pass(0, CodegenGenerateExternCalls())
 
         #######################################################################
         # TIR to runtime function registration
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
index 95a4e26ebe76..c6f5bba4a365 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
@@ -17,7 +17,8 @@
 """Transform passes for the UltraTrail accelerator"""
 
 import tvm
-from tvm import relay
+from tvm import relay, tir
+from tvm.topi.utils import prod
 
 from collections import OrderedDict
 
@@ -165,18 +166,30 @@ def var_scope_map(self, func):
 
         return mod
 
+def insert_extern_calls(sch):
+    def extern_calls():
+        calls = []
+        buffer_scopes = list(sch.mod["main"].attrs["relay_attrs"]["ut_buffer_scopes"])
+        buffer_scopes.reverse() # for some reason TIR params are reversed to relay function
+        for i, buffer_scope in enumerate(buffer_scopes):
+            buffer = sch.mod["main"].buffer_map[sch.mod["main"].params[i]]
+            size = prod(buffer.shape)
+            var = buffer.data
+            call = tir.call_extern("int32", f"load_{buffer_scope}", var, size)
+            calls.append(tir.Evaluate(call))
+        seq = tir.stmt_seq(*calls)
+        return tir.Block([], [], [], "call_extern", seq)
+
+    root_sref = sch.get_sref(sch.get_block("root"))
+    sch.state.replace(root_sref, extern_calls())
+
+    return sch
 
 @tvm.tir.transform.prim_func_pass(opt_level=1)
-class CodegenGenerateConfig:
+class CodegenGenerateExternCalls:
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
-        return func
-
-
-@tvm.tir.transform.prim_func_pass(opt_level=1)
-class CodegenGenerateConstants:
-    def transform_function(
-        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.tir.PrimFunc:
-        return func
+        sch = tir.Schedule(func)
+        sch = insert_extern_calls(sch)
+        return sch.mod["main"]
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py
deleted file mode 100644
index 4b2fd4ebd0a2..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/schedules.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""TIR schedule functions for the UltraTrail accelerator"""
-
-from tvm.topi.utils import prod
-from tvm import tir
-
-# create one load buffer extern_call for each buffer_var (input/weights)
-# - dont reset counter, only for first
-# - packed buffers, correct layout, take care of missalignment at the end (software?,hardware?)
-# create one load buffer for config
-def insert_extern_calls(sch):
-    def extern_calls():
-        calls = []
-        buffer_scopes = list(sch.mod["main"].attrs["relay_attrs"]["ut_buffer_scopes"])
-        buffer_scopes.reverse() # for some reason TIR params are reversed to relay function
-        for i, buffer_scope in enumerate(buffer_scopes):
-            buffer = sch.mod["main"].buffer_map[sch.mod["main"].params[i]]
-            size = prod(buffer.shape)
-            var = buffer.data
-            call = tir.call_extern("int32", f"load_{buffer_scope}", var, size)
-            calls.append(tir.Evaluate(call))
-        seq = tir.stmt_seq(*calls)
-        return tir.Block([], [], [], "call_extern", seq)
-
-    root_sref = sch.get_sref(sch.get_block("root"))
-    sch.state.replace(root_sref, extern_calls())
-
-    return sch

From 93f7d94f24290f70754734a7ecbeca191c770c07 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 25 Mar 2022 14:31:10 +0100
Subject: [PATCH 025/112] Remove extract constants since non-scalar constants
 are now supported by TVM

---
 .../relay/backend/contrib/uma/api/lower.py    |  4 --
 .../relay/backend/contrib/uma/api/utils.py    | 61 -------------------
 2 files changed, 65 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 5693dd8b2210..50b303e270ea 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -21,8 +21,6 @@
 
 from typing import List, Tuple, Callable, Optional
 
-from .utils import extract_constants
-
 
 class UMALower(object):
     def __init__(self, target_name: str) -> None:
@@ -54,14 +52,12 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
-        relay_prim_func, constants = extract_constants(relay_prim_func)
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
         tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        tir_prim_func = tir_prim_func.with_attr("constants", constants)
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index d8b056fd7061..9080b502d56f 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -15,64 +15,3 @@
 # specific language governing permissions and limitations
 # under the License.
 """Utility methods for the Universal Modular Accelerator Interface (UMA)"""
-
-import tvm
-from tvm import relay
-from tvm.relay.expr_functor import ExprMutator
-
-from enum import Enum
-
-
-##############################
-# Extract constants workaround
-##############################
-class ExtractConstants(ExprMutator):
-    """The actual mutator pass to extract the constants from a function and replace them with
-    Vars so the function can be lowered to a TE graph. Additionally returns all the values of
-    the constants extracted."""
-
-    def __init__(self):
-        super().__init__()
-        self.constants = {}
-        self.const_vars = []
-
-    def visit_constant(self, const):
-        if isinstance(const.checked_type, relay.ty.TensorType):
-            name = "p" + str(len(self.constants))
-            self.constants[name] = const.data
-            var = relay.var(type_annotation=const.checked_type, name_hint=name)
-            self.const_vars.append(var)
-            return var
-
-        return const
-
-    def visit_function(self, fn):
-        new_body = self.visit(fn.body)
-        new_params = list(fn.params) + self.const_vars
-        return relay.Function(new_params, new_body, attrs=fn.attrs)
-
-    def extract_constants(self, func):
-        new_func = self.visit(func)
-        return new_func, self.constants
-
-
-def extract_constants(func):
-    """Extract the constants from a function and replace them with
-    Vars so the function can be lowered to a TE graph. Additionally
-    returns all the values of the constants extracted.
-    Parameters
-    ----------
-    func : tvm.relay.Function
-        The Relay function from which to extract constants.
-    Returns
-    -------
-    new_func : tvm.relay.Function
-        The Relay function with constants replaced by vars.
-    const_dict : dict of int to numpy.ndarray
-        A dict of the extracted constants keyed by their param index.
-    """
-    new_func, consts = ExtractConstants().extract_constants(func)
-    new_func = tvm.relay.transform.InferType()(tvm.IRModule.from_expr(new_func))[
-        func.attrs["global_symbol"]
-    ]
-    return new_func, consts

From 008070d323ad03b57a8707607e95f49dbbfd2291 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 25 Mar 2022 17:47:13 +0100
Subject: [PATCH 026/112] API documentation and some code fixes and cleanup

---
 .../relay/backend/contrib/uma/api/lower.py    |   3 +
 .../backend/contrib/uma/api/partitioner.py    |  11 +-
 .../tvm/relay/backend/contrib/uma/backend.py  | 187 ++++++++++++++++--
 3 files changed, 179 insertions(+), 22 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 50b303e270ea..2570f9daee61 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -18,6 +18,7 @@
 
 import tvm
 from tvm import relay, te
+from tvm.relay.op.op import register_strategy
 
 from typing import List, Tuple, Callable, Optional
 
@@ -113,3 +114,5 @@ def register(self) -> None:
         tvm._ffi.register_func(
             "relay.ext.uma.relay_to_tir_func_{}".format(self.target_name), self.relay_to_tir_func
         )
+        for op, strategy, plevel in self._operator_strategies:
+            register_strategy(op, strategy, plevel)
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 0c7640a77990..59eb531138b2 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -26,21 +26,16 @@
 
 class UMAPartitioner(object):
     def __init__(
-        self, target_name: str, variant: str = "", merge_compiler_regions: bool = True
+        self, target_name: str, merge_compiler_regions: bool = True
     ) -> None:
         self.target_name = target_name
-        self.variant = variant
         self.merge_compiler_regions = merge_compiler_regions
 
         self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
-        self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, List[str]]] = []
+        self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern]] = []
 
     def _pattern_table(self):
-        return [
-            (self.target_name + "." + pattern[0], pattern[1])
-            for pattern in self._patterns
-            if self.variant in pattern[2] or not pattern[2]
-        ]
+        return [(self.target_name + "." + pattern[0], pattern[1]) for pattern in self._patterns]
 
     def register(self) -> None:
         register_pattern_table(self.target_name, self._pattern_table)
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index bce86020614c..5f5d5c6bdfa4 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -28,7 +28,10 @@
 
 class UMABackend(object):
     def __init__(self, variant: str = "", merge_compiler_region: bool = True) -> None:
-        self._relay_to_relay = UMAPartitioner(self.target_name, variant, merge_compiler_region)
+        # TODO: variant implementation
+        # - variant should allow the user to differentiate between different variants of the same NPU
+        # - we need to decide where we want to make the variant decision and which parts of UMA are affected by it
+        self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_region)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
 
@@ -47,16 +50,70 @@ def target_name(self) -> str:
     ############################################################################
     # Relay to Relay function registration
     ############################################################################
-    def _register_relay_pass(self, stage: int, relay_pass: tvm.transform.Pass) -> None:
-        self._relay_to_relay._relay_passes.append((stage, relay_pass))
+    def _register_relay_pass(self, phase: int, relay_pass: tvm.transform.Pass) -> None:
+        """Registers a relay pass at the given phase in the lowering process.
 
-    def _register_pattern(
-        self,
-        name: str,
-        pattern: tvm.relay.dataflow_pattern.DFPattern,
-        variants: Optional[List[str]] = None,
-    ) -> None:
-        self._relay_to_relay._patterns.append((name, pattern, [] if variants is None else variants))
+        Parameters
+        ----------
+        phase: int
+           The phase at which the pass is registered.
+
+        relay_pass: tvm.transform.Pass
+            The relay pass to be registered.
+
+        Example
+        -------
+        Here is an example of how two relay passes are registered.
+        Passes of the same phase are executed in the order they are registered.
+
+        .. code-block:: python
+
+            self._register_relay_pass(0, MyPassA)
+            self._register_relay_pass(0, MyPassB)
+
+        Where a relay pass can look like this:
+
+        .. code-block:: python
+            
+            @tvm.ir.transform.module_pass(opt_level=0)
+            class MyPassA:
+                def transform_module(self, mod, ctx):
+                    # My pass functionality...
+                    return mod
+        """
+        self._relay_to_relay._relay_passes.append((phase, relay_pass))
+
+    def _register_pattern(self, name: str, pattern: tvm.relay.dataflow_pattern.DFPattern,) -> None:
+        """Registers a dataflow pattern that is used to partition the relay graph.
+
+        Parameters
+        ----------
+        name: str
+           The name of the pattern.
+
+        pattern: tvm.relay.dataflow_pattern.DFPattern
+            The dataflow pattern.
+
+        Example
+        -------
+        Here is an example of how two dataflow patterns are registered.
+        During partioning, patterns are searched in order of registration.
+
+        .. code-block:: python
+
+            self._register_pattern("conv1d", conv1d_pattern)
+            self._register_pattern("conv2d", conv2d_pattern)
+
+        Where a dataflow pattern can look like this:
+
+        .. code-block:: python
+            
+            conv1d_pattern = is_op("nn.conv1d")(wildcard(), wildcard())
+            optional_bias = lambda x: is_op("nn.bias_add")(x, wildcard())
+            optional_relu = lambda x: is_op("nn.relu")(x)
+            conv1d_pattern = conv1d_pattern.optional(optional_bias).optional(optional_relu)
+        """
+        self._relay_to_relay._patterns.append((name, pattern))
 
     ############################################################################
     # Relay to TIR function registration
@@ -64,21 +121,123 @@ def _register_pattern(
     def _register_operator_strategy(
         self,
         op: str,
-        strat: Callable[
+        strategy: Callable[
             [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
             tvm.relay.op.op.OpStrategy,
         ],
         plevel: Optional[int] = 11,
     ) -> None:
-        self._relay_to_tir._operator_strategies.append((op, strat, plevel))
+        """Registers an operator strategy that is used to partition the relay graph.
+
+        Parameters
+        ----------
+        op: str
+           The name of the operator for which this strategy will be registered.
+
+        strategy: Callable[[tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target], tvm.relay.op.op.OpStrategy]
+            The strategy function.
+
+        plevel: Optional[int] = 11
+            The priority level of the strategy. Higher plevel equals higher priorization.
+            The TVM default for topi strategies is 10 so by default new UMA strategies are always used.
+
+        Example
+        -------
+        Here is an example of how two operator strategies are registered.
+
+        .. code-block:: python
 
-    def _register_tir_pass(self, stage: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
-        self._relay_to_tir._tir_passes.append((stage, tir_pass))
+            self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy)
+            self._register_operator_strategy("nn.conv2d", custom_conv2d_strategy)
+
+        Where a strategy function can look like this:
+
+        .. code-block:: python
+
+            @relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
+            def custom_conv1d_strategy(attrs, inputs, out_type, target):
+                strategy = _op.OpStrategy()
+                strategy.add_implementation(
+                    wrap_compute_conv1d(custom_conv1d_compute),
+                    wrap_topi_schedule(custom_conv1d_schedule),
+                    name="custom_conv1d.generic",
+                return strategy
+        """
+        self._relay_to_tir._operator_strategies.append((op, strategy, plevel))
+
+    def _register_tir_pass(self, phase: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
+        """Registers a TIR pass at the given phase in the lowering process.
+
+        Parameters
+        ----------
+        phase: int
+           The phase at which the pass is registered.
+
+        tir_pass: tvm.tir.transform.PrimFuncPass
+            The TIR pass to be registered.
+
+        Example
+        -------
+        Here is an example of how two TIR passes are registered.
+        Passes of the same phase are executed in the order they are registered.
+
+        .. code-block:: python
+
+            self._register_tir_pass(0, MyPassA)
+            self._register_tir_pass(0, MyPassB)
+
+        Where a TIR pass can look like this:
+
+        .. code-block:: python
+            
+            @tvm.tir.transform.prim_func_pass(opt_level=0)
+            class MyPassA:
+                def transform_function(self, func, mod, ctx):
+                    # My pass functionality...
+                    return func
+        """
+        self._relay_to_tir._tir_passes.append((phase, tir_pass))
 
     ############################################################################
     # TIR to runtime function registration
     ############################################################################
     def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
+        """Registers a codegen which is used in place of the default C-codegen.
+
+        Parameters
+        ----------
+        fmt: str
+            The codegen format. For now, only C-codegen is supported by UMA.
+
+        **kwargs
+            Keyword arguments for the chosen codegen.
+
+        Example
+        -------
+        Here is an example of how the custom C-codegen is registered and configured.
+        Passes of the same phase are executed in the order they are registered.
+
+        .. code-block:: python
+
+            self._register_codegen(
+                fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
+            )
+
+        The C-codegen provides two hooks which allows the user to insert code through the python API.
+            - `includes` hooks into the include stream and allows insertion of custom includes.
+            - `replace_call_extern` hooks into the expression visitor and allows the user to insert custom code for a given extern call.
+
+        The code generation functions can look like this:
+
+        .. code-block:: python
+
+            def gen_includes() -> str:
+                includes = "#include <my_custom_header.h>\n"
+                return includes
+
+            def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
+                return "my_custom_api_function({}, {}, {})".format(*args)
+        """
         self._tir_to_runtime._register_codegen(fmt, **kwargs)
 
     ############################################################################

From 684ff7085b252b06e5c65917214cca0ba8add310 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 25 Mar 2022 19:39:02 +0100
Subject: [PATCH 027/112] Fix typo

---
 python/tvm/relay/backend/contrib/uma/backend.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 5f5d5c6bdfa4..99266046298b 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -27,11 +27,11 @@
 
 
 class UMABackend(object):
-    def __init__(self, variant: str = "", merge_compiler_region: bool = True) -> None:
+    def __init__(self, variant: str = "", merge_compiler_regions: bool = True) -> None:
         # TODO: variant implementation
         # - variant should allow the user to differentiate between different variants of the same NPU
         # - we need to decide where we want to make the variant decision and which parts of UMA are affected by it
-        self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_region)
+        self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_regions)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
 

From aabb90d4f0cc2f0d6f94719a6725056f26ffc290 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Thu, 12 May 2022 14:11:32 +0200
Subject: [PATCH 028/112] Fix UMA lowering

---
 .../relay/backend/contrib/uma/api/_ffi_api.py |  20 +++
 .../relay/backend/contrib/uma/api/lower.py    |  32 +++--
 src/relay/backend/contrib/uma/relay_to_tir.cc | 127 ++++++++++++++----
 .../backend/contrib/uma/tir_to_runtime.cc     |   4 +-
 4 files changed, 139 insertions(+), 44 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/api/_ffi_api.py

diff --git a/python/tvm/relay/backend/contrib/uma/api/_ffi_api.py b/python/tvm/relay/backend/contrib/uma/api/_ffi_api.py
new file mode 100644
index 000000000000..5f67cb7ec246
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/api/_ffi_api.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""FFI APIs for relay transformation passes."""
+import tvm._ffi  # type: ignore
+
+tvm._ffi._init_api("relay.ext.uma", __name__)
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 2570f9daee61..369a7ddafe31 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -22,6 +22,8 @@
 
 from typing import List, Tuple, Callable, Optional
 
+from . import _ffi_api
+
 
 class UMALower(object):
     def __init__(self, target_name: str) -> None:
@@ -59,6 +61,7 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
+        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target(self.target_name))
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
@@ -90,29 +93,30 @@ def _lower_stir_to_nstir(self, prim_func: tvm.tir.PrimFunc) -> tvm.tir.PrimFunc:
         prim_func = mod[prim_func.attrs["global_symbol"]]
         return prim_func
 
-    def relay_to_tir_func(self, ext_func: relay.Function) -> tvm.tir.PrimFunc:
+    def relay_to_tir(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
         """
-        This is the hook for python-based lowering of relay function
-        that gets offloaded to the target NPU.
+        This is the hook for python-based lowering of a Relay module which lowers NPU
+        external functions to TIR.
 
         Parameters
         ----------
-        ext_func : relay.Function
-            The partitioned relay function.
+        mod : tvm.ir.IRModule
+            This is the Relay module.
 
         Returns
         -------
-        prim_func : tir.PrimFunc
-            The scheduled PrimFunc.
-
+        mod : tvm.ir.IRModule
+            The Relay module with scheduled NPU external functions.
         """
-        prim_func = self._lower_relay_to_tir(ext_func)
-        prim_func = self._lower_stir_to_nstir(prim_func)
-        return prim_func
+        mod = _ffi_api.OutlineCompilerFunctions(self.target_name)(mod)
+        for gv, func in mod.functions.items():
+            if "Compiler" in func.attrs and func.attrs["Compiler"] == self.target_name:
+                func = self._lower_relay_to_tir(func)
+                func = self._lower_stir_to_nstir(func)
+                mod.update_func(gv, func)
+        return mod
 
     def register(self) -> None:
-        tvm._ffi.register_func(
-            "relay.ext.uma.relay_to_tir_func_{}".format(self.target_name), self.relay_to_tir_func
-        )
+        tvm._ffi.register_func(f"relay.ext.uma.{self.target_name}.relay_to_tir", self.relay_to_tir)
         for op, strategy, plevel in self._operator_strategies:
             register_strategy(op, strategy, plevel)
diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index 55b17ccb0fea..10456ca10539 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -43,57 +43,126 @@ namespace contrib {
 namespace uma {
 
 /*!
- * \brief This mutator lowers each external
- * relay function to a TIR PrimFunc
+ * \brief This mutator outlines functions that are marked with a named
+ * "Compiler" attribute. Functions that do not match this condition remain
+ * unaltered.
  */
-class RelayToTIRMutator : public MixedModeMutator {
+class OutlineCompilerFunctionsMutator : public MixedModeMutator {
  public:
-  explicit RelayToTIRMutator(IRModule ir_module, String target_name)
-      : ir_module_(ir_module),
-        target_name_(target_name) {}
+  explicit OutlineCompilerFunctionsMutator(const IRModule& mod, const std::string& compiler_name)
+      : mod_(mod), compiler_name_(compiler_name) {}
 
-  IRModule operator()() {
-    GlobalVar main_global_var = ir_module_->GetGlobalVar("main");
-    Function main_func = Downcast<Function>(ir_module_->Lookup(main_global_var));
+  Expr VisitExpr_(const LetNode* op) final {
+    auto pre_visit = [this](const LetNode* op) {
+      Expr var = this->VisitExpr(op->var);
+      Expr value = this->VisitExpr(op->value);
 
-    // Copy everything across and mutate the body
-    Function mutated_main = WithFields(main_func, main_func->params, VisitExpr(main_func->body));
-
-    ir_module_->Update(main_global_var, mutated_main);
+      // Outlineable function no longer needs let binding
+      if (this->CanOutlineExpr(value)) {
+        this->memo_[var] = value;
+      }
+    };
+    auto post_visit = [this](const LetNode* op) {
+      // Rely on the Memoizer to cache pre-visit values
+      Expr value = this->VisitExpr(op->value);
+      Expr body = this->VisitExpr(op->body);
+      auto expr = GetRef<Expr>(op);
 
-    return ir_module_;
+      // Drop the let binding
+      if (this->CanOutlineExpr(value)) {
+        this->memo_[expr] = this->VisitExpr(op->body);
+      } else {
+        Var var = Downcast<Var>(this->VisitExpr(op->var));
+        if (var.same_as(op->var) && value.same_as(op->value) && body.same_as(op->body)) {
+          this->memo_[expr] = expr;
+        } else {
+          this->memo_[expr] = Let(var, value, body);
+        }
+      }
+    };
+    ExpandANormalForm(op, pre_visit, post_visit);
+    return memo_[GetRef<Expr>(op)];
   }
 
   Expr Rewrite_(const CallNode* pre, const Expr& post) override {
     Call call = Downcast<Call>(post);
-    if (call->op->IsInstance<FunctionNode>()) {
+    if (CanOutlineExpr(call->op)) {
       Function func = Downcast<Function>(call->op);
-      auto codegen_name = func->GetAttr<String>(attr::kCompiler);
-      if (codegen_name.defined() && codegen_name == target_name_) {
-        auto relay_to_tir_func_pf =
-            tvm::runtime::Registry::Get("relay.ext.uma.relay_to_tir_func_" + target_name_);
-        ICHECK(relay_to_tir_func_pf);
-        tir::PrimFunc prim_func = (*relay_to_tir_func_pf)(func);
-        prim_func = WithAttr(prim_func, tvm::attr::kTarget, Target(target_name_));
-        String symbol_name = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol).value();
-        GlobalVar gv(symbol_name);
+      auto gv_name = func->GetAttr<String>("global_symbol").value_or("");
+      ICHECK_NE(gv_name, "")
+          << "Function to be outlined must have global_symbol attribute, but didn't.";
+      GlobalVar gv(gv_name);
+      if (func->checked_type_.defined()) {
         gv->checked_type_ = func->checked_type();
-        ir_module_->Update(gv, prim_func);
-        return Call(gv, call->args, call->attrs, call->type_args);
       }
+      mod_->Update(gv, func);
+      return Call(gv, call->args, call->attrs, call->type_args);
     }
     return post;
   }
 
  private:
-  IRModule ir_module_;
-  String target_name_;
+  /*!
+   * \brief Check if the expr is a function and has the same
+   * compiler name as compiler_name_.
+   *
+   * \param expr The input expr.
+   * \return True if is outlineable else False.
+   */
+  bool CanOutlineExpr(const Expr& expr) {
+    if (!expr->IsInstance<FunctionNode>()) {
+      return false;
+    }
+    Function func = Downcast<Function>(expr);
+    auto compiler = func->GetAttr<String>(attr::kCompiler);
+    if (!compiler.defined()) {
+      return false;
+    }
+    if (compiler != compiler_name_) {
+      return false;
+    }
+    return true;
+  }
+
+  /*! \brief The module that the pass will run on. */
+  IRModule mod_;
+  /*! \brief The name of the compiler to enable outlining on external functions for. */
+  std::string compiler_name_;
 };
 
+/*!
+ * \brief A pass to outline compiler specific functions.
+ */
+tvm::transform::Pass OutlineCompilerFunctions(const std::string& compiler_name) {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule mod, transform::PassContext ctx) {
+        GlobalVar gv = mod->GetGlobalVar("main");
+        Function main_func = Downcast<Function>(mod->Lookup("main"));
+        auto new_main_body =
+            OutlineCompilerFunctionsMutator(mod, compiler_name).VisitExpr(main_func->body);
+        if (!new_main_body.same_as(main_func->body)) {
+          Function new_main_func = WithFields(main_func, main_func->params, new_main_body);
+          mod->Update(gv, new_main_func);
+        }
+        return mod;
+      };
+  return tvm::transform::CreateModulePass(
+      pass_func, 0, "relay.backend.contrib.uma.OutlineCompilerFunctions", {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.uma.OutlineCompilerFunctions")
+    .set_body_typed(OutlineCompilerFunctions);
+
+/*!
+ * \brief This pass will lower NPU functions in a Relay module to scheduled TIR prim functions.
+ */
 tvm::transform::Pass RelayToTIR(String target_name) {
   runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
       [=](IRModule ir_module, transform::PassContext pass_context) {
-        return RelayToTIRMutator(ir_module, target_name)();
+        auto relay_to_tir_pf = tvm::runtime::Registry::Get("relay.ext.uma." + target_name + ".relay_to_tir");
+        ICHECK(relay_to_tir_pf);
+        ir_module = (*relay_to_tir_pf)(ir_module);
+        return ir_module;
       };
   return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.uma.RelayToTIR", {});
 }
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index b578b98a20ae..927bfe5397c8 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -42,7 +42,9 @@ class UMACodegen : public codegen::CodeGenCHost {
     ICHECK(includes_pf);
     String includes = (*includes_pf)();
     decl_stream << includes;
-    CodeGenCHost::Init(output_ssa, emit_asserts, target_str_);
+    std::unordered_set<std::string> devices;
+    devices.insert(target_str_);
+    CodeGenCHost::Init(output_ssa, emit_asserts, target_str_, devices);
   }
 
   /*!

From 8092a448ff1f911c9237e9979faaee0897b36b6e Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Fri, 13 May 2022 19:02:02 +0200
Subject: [PATCH 029/112] Prototype for UMA-based target attribute registration

---
 python/tvm/relay/backend/contrib/uma/backend.py        | 10 +++++++++-
 .../relay/backend/contrib/uma/ultra_trail/backend.py   |  5 +++++
 src/relay/backend/contrib/uma/targets.cc               |  8 ++++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 99266046298b..ad5dd914a2fb 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -31,6 +31,7 @@ def __init__(self, variant: str = "", merge_compiler_regions: bool = True) -> No
         # TODO: variant implementation
         # - variant should allow the user to differentiate between different variants of the same NPU
         # - we need to decide where we want to make the variant decision and which parts of UMA are affected by it
+        self._target_attrs: List = list()
         self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_regions)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
@@ -47,6 +48,13 @@ def target_name(self) -> str:
         """
         ...
 
+    ############################################################################
+    # Target configuration
+    ############################################################################
+    def _register_target_attr(self, name: str) -> None:
+        """Register a target attribute name that can be used during target instantiation."""
+        self._target_attrs.append(name)
+
     ############################################################################
     # Relay to Relay function registration
     ############################################################################
@@ -245,7 +253,7 @@ def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
     ############################################################################
     def register(self) -> None:
         registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
-        registration_func(self.target_name)
+        registration_func(self.target_name, self._target_attrs)
 
         self._relay_to_relay.register()
         self._relay_to_tir.register()
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index f96d1082960e..c53877b0e68d 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -27,6 +27,11 @@ class UltraTrailBackend(UMABackend):
     def __init__(self):
         super(UltraTrailBackend, self).__init__()
 
+        #######################################################################
+        # Target configuration
+        #######################################################################
+        self._register_target_attr("dimension")
+
         #######################################################################
         # Relay to Relay function registration
         #######################################################################
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 812943f9fa94..3fd384df8b18 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -38,8 +38,8 @@ namespace uma {
 }  // namespace relay
 
 TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
-    .set_body_typed([](String target_name){
-        ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
+    .set_body_typed([](String target_name, Array<String> attr_names){
+        auto target_kind = ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
         .set_name()
         .set_device_type(kDLCPU)
         .add_attr_option<Array<String>>("keys")
@@ -51,6 +51,10 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         .add_attr_option<Integer>("from_device")
         .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
         .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
+
+        for (auto &attr_name : attr_names) {
+            target_kind.add_attr_option<String>(attr_name);
+        }
     });
 
 }  // namespace tvm

From 5ca3c736cf7e5c2d33598a9e63ba04841693954f Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Mon, 16 May 2022 15:14:05 +0200
Subject: [PATCH 030/112] Add default option and type deduction to
 register_target_attr

---
 .../relay/backend/contrib/uma/api/lower.py    |  1 +
 .../tvm/relay/backend/contrib/uma/backend.py  | 10 ++++++----
 src/relay/backend/contrib/uma/targets.cc      | 20 ++++++++++++++++---
 .../backend/contrib/uma/tir_to_runtime.cc     |  2 +-
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 369a7ddafe31..4b64c53c5d83 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -61,6 +61,7 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
+        # TODO: The target should probably come from somewhere else instead of being created here.
         tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target(self.target_name))
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index ad5dd914a2fb..6b2b4bee5c15 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -19,7 +19,7 @@
 import tvm
 
 from abc import abstractmethod
-from typing import List, Dict, Callable, Optional
+from typing import Union, Dict, Callable, Optional
 
 from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
 from tvm.relay.backend.contrib.uma.api.lower import UMALower
@@ -31,7 +31,7 @@ def __init__(self, variant: str = "", merge_compiler_regions: bool = True) -> No
         # TODO: variant implementation
         # - variant should allow the user to differentiate between different variants of the same NPU
         # - we need to decide where we want to make the variant decision and which parts of UMA are affected by it
-        self._target_attrs: List = list()
+        self._target_attrs: Dict = dict()
         self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_regions)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
@@ -51,9 +51,11 @@ def target_name(self) -> str:
     ############################################################################
     # Target configuration
     ############################################################################
-    def _register_target_attr(self, name: str) -> None:
+    def _register_target_attr(
+        self, name: str, default: Optional[Union[str, int, bool]] = "",
+    ) -> None:
         """Register a target attribute name that can be used during target instantiation."""
-        self._target_attrs.append(name)
+        self._target_attrs[name] = default
 
     ############################################################################
     # Relay to Relay function registration
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 3fd384df8b18..6693df5b4ce3 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -38,7 +38,7 @@ namespace uma {
 }  // namespace relay
 
 TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
-    .set_body_typed([](String target_name, Array<String> attr_names){
+    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options){
         auto target_kind = ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
         .set_name()
         .set_device_type(kDLCPU)
@@ -52,8 +52,22 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
         .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
-        for (auto &attr_name : attr_names) {
-            target_kind.add_attr_option<String>(attr_name);
+        for (auto &attr_option : attr_options) {
+          try {
+            target_kind.add_attr_option<String>(attr_option.first, Downcast<String>(attr_option.second));
+            continue;
+          } catch (...) {}
+          try {
+            target_kind.add_attr_option<Bool>(attr_option.first, Downcast<Bool>(attr_option.second));
+            continue;
+          } catch (...) {}
+          try {
+            target_kind.add_attr_option<Integer>(attr_option.first, Downcast<Integer>(attr_option.second));
+            continue;
+          } catch (...) {
+            LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey() 
+                       << " can not be added. Only String, Integer, or Bool are supported.";
+          }
         }
     });
 
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index 927bfe5397c8..e34bb1e90207 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -82,7 +82,7 @@ class UMACodegen : public codegen::CodeGenCHost {
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
   bool emit_asserts = false;
-  UMACodegen codegen (target->str());
+  UMACodegen codegen (target->kind->name);
   Array<String> function_names;
   codegen.Init(output_ssa, emit_asserts);
   for (auto kv : mod->functions) {

From 52f1fdb2823a1265adc5a35235c76856b5e1f591 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Tue, 31 May 2022 14:34:44 +0200
Subject: [PATCH 031/112] Change pass phases to enum

---
 .../relay/backend/contrib/uma/api/lower.py    | 23 ++++--
 .../backend/contrib/uma/api/partitioner.py    | 26 ++++---
 .../relay/backend/contrib/uma/api/utils.py    | 14 ++++
 .../tvm/relay/backend/contrib/uma/backend.py  | 75 ++++++++++++-------
 .../contrib/uma/ultra_trail/backend.py        | 20 +++--
 5 files changed, 108 insertions(+), 50 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 4b64c53c5d83..d1afa8879c30 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -16,16 +16,18 @@
 # under the License.
 """Lowering base class of the Universal Modular Accelerator Interface (UMA)"""
 
+from typing import List, Tuple, Callable, Optional
+
 import tvm
+from . import _ffi_api
 from tvm import relay, te
 from tvm.relay.op.op import register_strategy
+from .utils import PassPhase
 
-from typing import List, Tuple, Callable, Optional
-
-from . import _ffi_api
 
+class UMALower:
+    """Lowering base class of the Universal Modular Accelerator Interface (UMA)."""
 
-class UMALower(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
 
@@ -39,7 +41,7 @@ def __init__(self, target_name: str) -> None:
                 Optional[int],
             ]
         ] = []
-        self._tir_passes: List[Tuple[int, tvm.tir.transform.PrimFuncPass]] = []
+        self._tir_passes: List[Tuple[PassPhase, tvm.tir.transform.PrimFuncPass]] = []
 
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
         """Lower a Relay primitive function to a S-TIR primitive function.
@@ -83,12 +85,20 @@ def _lower_stir_to_nstir(self, prim_func: tvm.tir.PrimFunc) -> tvm.tir.PrimFunc:
         curr_ctxt = tvm.transform.PassContext().current()
         assert "tir.add_lower_pass" not in curr_ctxt.config
 
+        pass_map = {
+            PassPhase.TIR_PHASE_0: 0,
+            PassPhase.TIR_PHASE_1: 1,
+            PassPhase.TIR_PHASE_2: 2,
+            PassPhase.TIR_PHASE_3: 3,
+        }
+        lower_passes = [(pass_map[k], v) for k, v in self._tir_passes]
+
         with tvm.transform.PassContext(
             opt_level=curr_ctxt.opt_level,
             required_pass=curr_ctxt.required_pass,
             disabled_pass=curr_ctxt.disabled_pass,
             instruments=curr_ctxt.instruments,
-            config={**dict(curr_ctxt.config), "tir.add_lower_pass": self._tir_passes},
+            config={**dict(curr_ctxt.config), "tir.add_lower_pass": lower_passes},
         ):
             mod = tvm.lower(tvm.ir.IRModule.from_expr(prim_func))
         prim_func = mod[prim_func.attrs["global_symbol"]]
@@ -118,6 +128,7 @@ def relay_to_tir(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
         return mod
 
     def register(self) -> None:
+        """Register all relevant relay-to-tir functions."""
         tvm._ffi.register_func(f"relay.ext.uma.{self.target_name}.relay_to_tir", self.relay_to_tir)
         for op, strategy, plevel in self._operator_strategies:
             register_strategy(op, strategy, plevel)
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 59eb531138b2..40014756207b 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -16,28 +16,30 @@
 # under the License.
 """Partitioner base class of the Universal Modular Accelerator Interface (UMA)"""
 
+from typing import Dict, List, Tuple, Optional
+
 import tvm
 from tvm import relay
 from tvm.relay.build_module import bind_params_by_name
 from tvm.relay.op.contrib.register import register_pattern_table
+from .utils import PassPhase
 
-from typing import Dict, List, Tuple, Optional
 
+class UMAPartitioner():
+    """Partitioner base class of the Universal Modular Accelerator Interface (UMA)."""
 
-class UMAPartitioner(object):
-    def __init__(
-        self, target_name: str, merge_compiler_regions: bool = True
-    ) -> None:
+    def __init__(self, target_name: str, merge_compiler_regions: bool = True) -> None:
         self.target_name = target_name
         self.merge_compiler_regions = merge_compiler_regions
 
-        self._relay_passes: List[Tuple[int, tvm.transform.Pass]] = []
+        self._relay_passes: List[Tuple[PassPhase, tvm.transform.Pass]] = []
         self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern]] = []
 
     def _pattern_table(self):
         return [(self.target_name + "." + pattern[0], pattern[1]) for pattern in self._patterns]
 
     def register(self) -> None:
+        """Register all relevant relay-to-relay functions."""
         register_pattern_table(self.target_name, self._pattern_table)
 
     def partition(
@@ -60,7 +62,9 @@ def partition(
             mod["main"] = bind_params_by_name(mod["main"], params)
 
         mod = relay.transform.InferType()(mod)
-        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 0])(mod)
+        mod = tvm.transform.Sequential(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING]
+        )(mod)
         mod = relay.transform.MergeComposite(self._pattern_table())(mod)
         mod = relay.transform.AnnotateTarget(self.target_name)(mod)
         if self.merge_compiler_regions:
@@ -68,11 +72,15 @@ def partition(
         mod = relay.transform.InferType()(mod)
         mod = relay.transform.PartitionGraph()(mod)
         mod = relay.transform.InferType()(mod)
-        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 1])(mod)
+        mod = tvm.transform.Sequential(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0]
+        )(mod)
         mod = relay.transform.InferType()(mod)
         # Defunctionalize the partitioned functions to allow lowering
         for gv, func in mod.functions.items():
             mod.update_func(gv, relay.transform.Defunctionalization(func, mod))
-        mod = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == 2])(mod)
+        mod = tvm.transform.Sequential(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1]
+        )(mod)
 
         return mod
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index 9080b502d56f..f0d7de1dec87 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -15,3 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 """Utility methods for the Universal Modular Accelerator Interface (UMA)"""
+
+from enum import Enum, auto
+
+# TODO: naming
+class PassPhase(Enum):
+    """UMA pass phases."""
+
+    PRE_PARTITIONING = auto()
+    POST_PARTITIONING_0 = auto()
+    POST_PARTITIONING_1 = auto()
+    TIR_PHASE_0 = auto()
+    TIR_PHASE_1 = auto()
+    TIR_PHASE_2 = auto()
+    TIR_PHASE_3 = auto()
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 6b2b4bee5c15..7079624a6633 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -16,22 +16,20 @@
 # under the License.
 """Backend base class of the Universal Modular Accelerator Interface (UMA)"""
 
-import tvm
-
-from abc import abstractmethod
-from typing import Union, Dict, Callable, Optional
+from abc import ABC, abstractmethod
+from typing import Union, Dict, Callable, Optional, Any
 
-from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
-from tvm.relay.backend.contrib.uma.api.lower import UMALower
+import tvm
 from tvm.relay.backend.contrib.uma.api.codegen import UMACodegen
+from tvm.relay.backend.contrib.uma.api.lower import UMALower
+from tvm.relay.backend.contrib.uma.api.partitioner import UMAPartitioner
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
 
 
-class UMABackend(object):
-    def __init__(self, variant: str = "", merge_compiler_regions: bool = True) -> None:
-        # TODO: variant implementation
-        # - variant should allow the user to differentiate between different variants of the same NPU
-        # - we need to decide where we want to make the variant decision and which parts of UMA are affected by it
-        self._target_attrs: Dict = dict()
+class UMABackend(ABC):
+    def __init__(self, merge_compiler_regions: bool = True) -> None:
+        self._target_attrs: Dict = {}
+        self._target_preprocessor: Callable[[str], Dict[str, Any]] = None
         self._relay_to_relay = UMAPartitioner(self.target_name, merge_compiler_regions)
         self._relay_to_tir = UMALower(self.target_name)
         self._tir_to_runtime = UMACodegen(self.target_name)
@@ -52,20 +50,40 @@ def target_name(self) -> str:
     # Target configuration
     ############################################################################
     def _register_target_attr(
-        self, name: str, default: Optional[Union[str, int, bool]] = "",
+        self,
+        name: str,
+        default: Optional[Union[str, int, bool]] = "",
     ) -> None:
-        """Register a target attribute name that can be used during target instantiation."""
+        """Register a target attribute name that can be used during target instantiation.
+        Parameters
+        ----------
+        name: str
+           The name of the target attribute.
+
+        default: Optional[Union[str, int, bool]]
+            A default value for the attribute.
+            If none is provided, the attribute will be treated as a string.
+
+        Example
+        -------
+        Here is an example of how two attribute options are registered.
+
+        .. code-block:: python
+
+            self._register_target_attr("attrA", default=0)
+            self._register_target_attr("attrB", default=False)
+        """
         self._target_attrs[name] = default
 
     ############################################################################
     # Relay to Relay function registration
     ############################################################################
-    def _register_relay_pass(self, phase: int, relay_pass: tvm.transform.Pass) -> None:
+    def _register_relay_pass(self, phase: PassPhase, relay_pass: tvm.transform.Pass) -> None:
         """Registers a relay pass at the given phase in the lowering process.
 
         Parameters
         ----------
-        phase: int
+        phase: PassPhase
            The phase at which the pass is registered.
 
         relay_pass: tvm.transform.Pass
@@ -78,13 +96,13 @@ def _register_relay_pass(self, phase: int, relay_pass: tvm.transform.Pass) -> No
 
         .. code-block:: python
 
-            self._register_relay_pass(0, MyPassA)
-            self._register_relay_pass(0, MyPassB)
+            self._register_relay_pass(PassPhase.PRE_PARTITIONING, MyPassA)
+            self._register_relay_pass(PassPhase.POST_PARTITIONING, MyPassB)
 
         Where a relay pass can look like this:
 
         .. code-block:: python
-            
+
             @tvm.ir.transform.module_pass(opt_level=0)
             class MyPassA:
                 def transform_module(self, mod, ctx):
@@ -93,7 +111,11 @@ def transform_module(self, mod, ctx):
         """
         self._relay_to_relay._relay_passes.append((phase, relay_pass))
 
-    def _register_pattern(self, name: str, pattern: tvm.relay.dataflow_pattern.DFPattern,) -> None:
+    def _register_pattern(
+        self,
+        name: str,
+        pattern: tvm.relay.dataflow_pattern.DFPattern,
+    ) -> None:
         """Registers a dataflow pattern that is used to partition the relay graph.
 
         Parameters
@@ -117,7 +139,7 @@ def _register_pattern(self, name: str, pattern: tvm.relay.dataflow_pattern.DFPat
         Where a dataflow pattern can look like this:
 
         .. code-block:: python
-            
+
             conv1d_pattern = is_op("nn.conv1d")(wildcard(), wildcard())
             optional_bias = lambda x: is_op("nn.bias_add")(x, wildcard())
             optional_relu = lambda x: is_op("nn.relu")(x)
@@ -175,17 +197,16 @@ def custom_conv1d_strategy(attrs, inputs, out_type, target):
         """
         self._relay_to_tir._operator_strategies.append((op, strategy, plevel))
 
-    def _register_tir_pass(self, phase: int, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
+    def _register_tir_pass(self, phase: PassPhase, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
         """Registers a TIR pass at the given phase in the lowering process.
 
         Parameters
         ----------
-        phase: int
+        phase: PassPhase
            The phase at which the pass is registered.
 
         tir_pass: tvm.tir.transform.PrimFuncPass
             The TIR pass to be registered.
-
         Example
         -------
         Here is an example of how two TIR passes are registered.
@@ -193,13 +214,13 @@ def _register_tir_pass(self, phase: int, tir_pass: tvm.tir.transform.PrimFuncPas
 
         .. code-block:: python
 
-            self._register_tir_pass(0, MyPassA)
-            self._register_tir_pass(0, MyPassB)
+            self._register_tir_pass(PassPhase.TIR_PHASE_0, MyPassA)
+            self._register_tir_pass(PassPhase.TIR_PHASE_1, MyPassB)
 
         Where a TIR pass can look like this:
 
         .. code-block:: python
-            
+
             @tvm.tir.transform.prim_func_pass(opt_level=0)
             class MyPassA:
                 def transform_function(self, func, mod, ctx):
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index c53877b0e68d..dfa2b6627cc3 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -16,36 +16,40 @@
 # under the License.
 """UMA backend for the UltraTrail accelerator"""
 
+from ..api.utils import PassPhase
 from ..backend import UMABackend
-from .strategies import *
-from .passes import *
-from .patterns import *
-from .codegen import *
+from .codegen import gen_includes, gen_replace_call_extern
+from .passes import ConfigGenerator, BufferScopeAnnotator, CodegenGenerateExternCalls
+from .patterns import conv1d_relu_pattern
+from .strategies import custom_conv1d_strategy
 
 
 class UltraTrailBackend(UMABackend):
+    """UMA backend for the UltraTrail accelerator."""
+
     def __init__(self):
-        super(UltraTrailBackend, self).__init__()
+        super().__init__()
 
         #######################################################################
         # Target configuration
         #######################################################################
         self._register_target_attr("dimension")
+        self._register_target_preprocessor(lambda x: {"attr": x})
 
         #######################################################################
         # Relay to Relay function registration
         #######################################################################
         self._register_pattern("conv1d_relu", conv1d_relu_pattern())
 
-        self._register_relay_pass(1, ConfigGenerator())
-        self._register_relay_pass(2, BufferScopeAnnotator())
+        self._register_relay_pass(PassPhase.POST_PARTITIONING_0, ConfigGenerator())
+        self._register_relay_pass(PassPhase.POST_PARTITIONING_1, BufferScopeAnnotator())
 
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
         self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
 
-        self._register_tir_pass(0, CodegenGenerateExternCalls())
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, CodegenGenerateExternCalls())
 
         #######################################################################
         # TIR to runtime function registration

From d5ca776f282de0f08c9d0181d7ae0b5b68fefafc Mon Sep 17 00:00:00 2001
From: mbs-octoml <mbs@octoml.ai>
Date: Mon, 23 May 2022 13:07:18 -0700
Subject: [PATCH 032/112] [Relay] Plumb external codegen target via
 Target.current() for all external codegen paths

(See https://discuss.tvm.apache.org/t/byoc-supporting-cutlass-byoc-with-collage/12796/6 for
context, which in turn is part of Collage (https://github.com/apache/tvm-rfcs/blob/main/rfcs/0062-collage.md).

We want both old-style (via relay.ext.$toolchain) and new-style (via "RelayToTIR" Pass
attribute on target kind) external codegen to be able to access the current 'external codegen'
Target instance via Target.current().

 - For old-style, plumb the true Target through TEComplier and push it on the context
   stack before calling relay.ext.$toolchain.

 - For new-style, pass the CompilationConfig to the RelayToTIRTargetHook pass, make the jump from
   "Compiler" attribute value to Target via the new CompilationConfig::FindPrimitiveTargetForKind
   method, and push on the stack before invoking the custom "RelayToTIR" pass.

While working on this discovered RelayToTIRTargetHook was incompatible with the VM's compilation
flow since RelayToTIRTargetHook assumes all "Compiler" attributed functions are inlined. Generalize
it to support both inline and global function styles.

Extend Target::IsExternalCodegen to recognize target kinds with "RelayToTIR" attributes as
external.

Update target hooks unit test to exercise new support for outline-style, picking up the current target,
and compiling via the VM.
---
 include/tvm/relay/transform.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/tvm/relay/transform.h b/include/tvm/relay/transform.h
index b37d0f83adf3..063928842a1b 100644
--- a/include/tvm/relay/transform.h
+++ b/include/tvm/relay/transform.h
@@ -509,6 +509,8 @@ TVM_DLL Pass SimplifyExpr();
  *
  * \param config All available targets.
  *
+ * \param config All available targets.
+ *
  * \return The pass.
  */
 TVM_DLL Pass RelayToTIRTargetHook(CompilationConfig config);

From 8b299ad066428505538561625179477c7cd8d31f Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 1 Jun 2022 09:47:50 +0200
Subject: [PATCH 033/112] Use current target in lowering

---
 3rdparty/dmlc-core                                           | 2 +-
 python/tvm/relay/backend/contrib/uma/api/lower.py            | 3 +--
 python/tvm/relay/backend/contrib/uma/run.py                  | 5 +++--
 python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py  | 4 ++--
 .../tvm/relay/backend/contrib/uma/ultra_trail/strategies.py  | 5 +++++
 python/tvm/topi/generic/injective.py                         | 4 ++--
 6 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core
index 09511cf9fe5f..21cc7de0dc9f 160000
--- a/3rdparty/dmlc-core
+++ b/3rdparty/dmlc-core
@@ -1 +1 @@
-Subproject commit 09511cf9fe5ff103900a5eafb50870dc84cc17c8
+Subproject commit 21cc7de0dc9fd6acb796e1be6181fa8e6b6c8f41
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index d1afa8879c30..ce3b7c4921d3 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -63,8 +63,7 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        # TODO: The target should probably come from somewhere else instead of being created here.
-        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target(self.target_name))
+        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target.current())
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
index 550e05ade818..fd584b6d6a20 100644
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -43,14 +43,15 @@ def main():
     mod = ut_backend.partition(mod)
 
     # Relay build (AOT C target)
-    TARGET = tvm.target.Target("c")
+    TARGET = tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))
+    GENERIC_TARGET= tvm.target.Target("c")
     RUNTIME = tvm.relay.backend.Runtime("crt")
     EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
 
     with tvm.transform.PassContext(
         opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
     ):
-        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
+        module = relay.build(mod, target=[GENERIC_TARGET, TARGET], runtime=RUNTIME, executor=EXECUTOR, params=params)
 
     model_library_format_tar_path = Path("build/lib.tar")
     model_library_format_tar_path.unlink(missing_ok=True)
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index dfa2b6627cc3..4307ddd6de7e 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -34,7 +34,7 @@ def __init__(self):
         # Target configuration
         #######################################################################
         self._register_target_attr("dimension")
-        self._register_target_preprocessor(lambda x: {"attr": x})
+        #self._register_target_preprocessor(lambda x: {"attr": x})
 
         #######################################################################
         # Relay to Relay function registration
@@ -47,7 +47,7 @@ def __init__(self):
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
-        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
+        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=11)
 
         self._register_tir_pass(PassPhase.TIR_PHASE_0, CodegenGenerateExternCalls())
 
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
index cd01a8c87132..745dd697b20d 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
@@ -89,6 +89,11 @@ def schedule_conv1d_ncw(outs):
     nn, kk, xx = s[outs[0]].op.axis
     rc, rx = s[outs[0]].op.reduce_axis
 
+
+    print("==================")
+    print(outs)
+    print("==================")
+
     # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
     # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
 
diff --git a/python/tvm/topi/generic/injective.py b/python/tvm/topi/generic/injective.py
index 6b8109897b99..01efe51b8033 100644
--- a/python/tvm/topi/generic/injective.py
+++ b/python/tvm/topi/generic/injective.py
@@ -56,8 +56,8 @@ def schedule_injective(outs):
         The computation schedule for the op.
     """
     target = tvm.target.Target.current(allow_none=False)
-    if target.kind.name != "llvm":
-        raise RuntimeError("schedule_injective not registered for '%s'" % target)
+    #if target.kind.name != "llvm":
+    #    raise RuntimeError("schedule_injective not registered for '%s'" % target)
     outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
     x = outs[0]
     s = te.create_schedule([x.op for x in outs])

From f147087b75c12c5df0901f7456ec8afc9379cc14 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 1 Jun 2022 10:32:10 +0200
Subject: [PATCH 034/112] Use attr:kRelayToTIR

---
 src/relay/backend/contrib/uma/targets.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 6693df5b4ce3..f0d562a0e9c2 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -49,7 +49,7 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         .add_attr_option<Array<String>>("libs")
         .add_attr_option<Target>("host")
         .add_attr_option<Integer>("from_device")
-        .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
+        .set_attr<FTVMRelayToTIR>(tvm::attr::kRelayToTIR, relay::contrib::uma::RelayToTIR(target_name))
         .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
         for (auto &attr_option : attr_options) {

From a1ec13d00c315a9dbe6033d7e1d00e0bc1da3218 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <gerum@informatik.uni-tuebingen.de>
Date: Wed, 1 Jun 2022 09:26:13 +0000
Subject: [PATCH 035/112] Remove erronousely commited quick fix

---
 python/tvm/topi/generic/injective.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/topi/generic/injective.py b/python/tvm/topi/generic/injective.py
index 01efe51b8033..6b8109897b99 100644
--- a/python/tvm/topi/generic/injective.py
+++ b/python/tvm/topi/generic/injective.py
@@ -56,8 +56,8 @@ def schedule_injective(outs):
         The computation schedule for the op.
     """
     target = tvm.target.Target.current(allow_none=False)
-    #if target.kind.name != "llvm":
-    #    raise RuntimeError("schedule_injective not registered for '%s'" % target)
+    if target.kind.name != "llvm":
+        raise RuntimeError("schedule_injective not registered for '%s'" % target)
     outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
     x = outs[0]
     s = te.create_schedule([x.op for x in outs])

From 445b5384ccba6389cca7248f9d51da517cae124d Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 1 Jun 2022 12:00:06 +0200
Subject: [PATCH 036/112] Towards test cases for uma

---
 .../backend/contrib/uma/test_uma/test_uma.py  | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py

diff --git a/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py b/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
new file mode 100644
index 000000000000..e6b75f3b47e7
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
@@ -0,0 +1,74 @@
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
+
+import pytest
+import torch
+import tarfile
+import tempfile
+from pathlib import Path
+
+
+class TorchModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv = torch.nn.Conv1d(
+            16, 24, 9, bias=True, padding=4, stride=1, dilation=1, groups=1
+        )
+        self.relu = torch.nn.ReLU()
+        self.conv2 = torch.nn.Conv1d(
+            24, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
+        )
+        self.relu2 = torch.nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.relu2(x)
+        x = x + 42
+        return x
+
+
+# Target Registration
+ut_backend = UltraTrailBackend()
+ut_backend.register()
+
+@pytest.mark.parametrize(
+    "compound_target", 
+    [
+        [tvm.target.Target("c"), tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))]
+    ]
+)
+def test_ultra_trail(compound_target):
+    torch_mod = TorchModel()
+    # Pytorch frontend
+    input_shape = (1, 16, 20)
+    dummy_input = torch.randn(input_shape)
+    scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
+    mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
+
+    # Relay target specific partitioning    
+    mod = ut_backend.partition(mod)
+
+    # Relay build (AOT C target)
+    RUNTIME = tvm.relay.backend.Runtime("crt")
+    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
+
+    with tvm.transform.PassContext(
+        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
+    ):
+        module = relay.build(mod, target=compound_target, runtime=RUNTIME, executor=EXECUTOR, params=params)
+
+    with tempfile.TemporaryDirectory() as build_dir:
+        build_dir = Path(build_dir)
+        model_library_format_tar_path = build_dir / "lib.tar"
+        model_library_format_tar_path.unlink(missing_ok=True)
+        model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
+
+        tvm.micro.export_model_library_format(module, model_library_format_tar_path)
+
+        print("Built MLF Library: ")
+        with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
+            print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
+            tar_f.extractall(model_library_format_tar_path.parent)

From 36f07ee4fddcfe787af8109abf59874a1699f827 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 1 Jun 2022 13:44:37 +0200
Subject: [PATCH 037/112] Add test_uma

---
 .../backend/contrib/uma/test_uma/test_uma.py   | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py b/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
index e6b75f3b47e7..8c17021d9a1b 100644
--- a/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
+++ b/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
@@ -1,3 +1,5 @@
+# FIXME: move to proper test case directory
+
 import tvm
 from tvm import relay
 from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
@@ -37,7 +39,8 @@ def forward(self, x):
 @pytest.mark.parametrize(
     "compound_target", 
     [
-        [tvm.target.Target("c"), tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))]
+        [tvm.target.Target("llvm"), tvm.target.Target("ultra_trail", host=tvm.target.Target("llvm"))],
+        [tvm.target.Target("c"), tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))],
     ]
 )
 def test_ultra_trail(compound_target):
@@ -48,12 +51,17 @@ def test_ultra_trail(compound_target):
     scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
     mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
 
-    # Relay target specific partitioning    
+    # Relay target specific partitioning
     mod = ut_backend.partition(mod)
 
+    generic_target = compound_target[0]
+
     # Relay build (AOT C target)
-    RUNTIME = tvm.relay.backend.Runtime("crt")
-    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
+    RUNTIME = tvm.relay.backend.Runtime("crt", {"system-lib": True})
+    if str(generic_target.kind) == "llvm":
+        EXECUTOR = tvm.relay.backend.Executor("graph", {"link-params": True})
+    else:
+        EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
 
     with tvm.transform.PassContext(
         opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
@@ -71,4 +79,4 @@ def test_ultra_trail(compound_target):
         print("Built MLF Library: ")
         with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
             print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
-            tar_f.extractall(model_library_format_tar_path.parent)
+            tar_f.extractall(model_library_format_tar_path.parent)
\ No newline at end of file

From 73012aa570de7cef677aa787cbf049427ca16ac1 Mon Sep 17 00:00:00 2001
From: Paul Palomero Bernardo <paulpb@outlook.com>
Date: Tue, 31 May 2022 20:30:30 +0200
Subject: [PATCH 038/112] Initial UMA structure for version 1

---
 python/tvm/relay/backend/contrib/uma/api/lower.py            | 3 ++-
 python/tvm/relay/backend/contrib/uma/run.py                  | 5 ++---
 python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py  | 3 +--
 .../tvm/relay/backend/contrib/uma/ultra_trail/strategies.py  | 5 -----
 src/relay/backend/contrib/uma/targets.cc                     | 2 +-
 5 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index ce3b7c4921d3..d1afa8879c30 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -63,7 +63,8 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target.current())
+        # TODO: The target should probably come from somewhere else instead of being created here.
+        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target(self.target_name))
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func
 
diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
index fd584b6d6a20..550e05ade818 100644
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -43,15 +43,14 @@ def main():
     mod = ut_backend.partition(mod)
 
     # Relay build (AOT C target)
-    TARGET = tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))
-    GENERIC_TARGET= tvm.target.Target("c")
+    TARGET = tvm.target.Target("c")
     RUNTIME = tvm.relay.backend.Runtime("crt")
     EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
 
     with tvm.transform.PassContext(
         opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
     ):
-        module = relay.build(mod, target=[GENERIC_TARGET, TARGET], runtime=RUNTIME, executor=EXECUTOR, params=params)
+        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
 
     model_library_format_tar_path = Path("build/lib.tar")
     model_library_format_tar_path.unlink(missing_ok=True)
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
index 4307ddd6de7e..b53e39ccd6ae 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
@@ -34,7 +34,6 @@ def __init__(self):
         # Target configuration
         #######################################################################
         self._register_target_attr("dimension")
-        #self._register_target_preprocessor(lambda x: {"attr": x})
 
         #######################################################################
         # Relay to Relay function registration
@@ -47,7 +46,7 @@ def __init__(self):
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
-        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=11)
+        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
 
         self._register_tir_pass(PassPhase.TIR_PHASE_0, CodegenGenerateExternCalls())
 
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
index 745dd697b20d..cd01a8c87132 100644
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
+++ b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
@@ -89,11 +89,6 @@ def schedule_conv1d_ncw(outs):
     nn, kk, xx = s[outs[0]].op.axis
     rc, rx = s[outs[0]].op.reduce_axis
 
-
-    print("==================")
-    print(outs)
-    print("==================")
-
     # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
     # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
 
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index f0d562a0e9c2..6693df5b4ce3 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -49,7 +49,7 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         .add_attr_option<Array<String>>("libs")
         .add_attr_option<Target>("host")
         .add_attr_option<Integer>("from_device")
-        .set_attr<FTVMRelayToTIR>(tvm::attr::kRelayToTIR, relay::contrib::uma::RelayToTIR(target_name))
+        .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
         .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
         for (auto &attr_option : attr_options) {

From cd8dcbb3fca423ad252d655143dc4908301c2057 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 23 Jun 2022 14:52:03 +0000
Subject: [PATCH 039/112] [UMA]: conv2d unit test

---
 .../backend/contrib/uma/_template/passes.py   | 134 ++++++++++++++++++
 tests/python/unittest/test_uma_passes.py      |  69 +++++++++
 2 files changed, 203 insertions(+)
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/passes.py
 create mode 100644 tests/python/unittest/test_uma_passes.py

diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
new file mode 100644
index 000000000000..e78d52e93d06
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the UltraTrail accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.topi.utils import prod
+
+from collections import OrderedDict
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+def my_ai_hw_conv2d_pass(func, mod, ctx):
+    _found_blocks = []
+    _loops = []
+    _entry_node = None
+    _external_function_name = "my_hw_ai_conv2dnchw"
+
+    def _has_block(self, name: str, func) -> bool:
+        """
+        Determine of a tir.block with `name` exists in `func`
+        """
+        def _hb(op):
+            if isinstance(op, tvm.tir.Block):
+                my_ai_hw_conv2d_pass._found_blocks.append(op.name_hint)
+
+        my_ai_hw_conv2d_pass._found_blocks = []
+        tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
+        return name in my_ai_hw_conv2d_pass._found_blocks
+
+    def transform_function2(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        def _replace_conv2d(op):
+            if op == my_ai_hw_conv2d_pass._entry_node:
+                irb = tvm.tir.ir_builder.create()
+                # Collection of buffer address
+                buffers = [b[1].data for b in my_ai_hw_conv2d_pass._handles]
+                # extraction of loop offsets
+                for i in my_ai_hw_conv2d_pass._loops:
+                    assert i.min.value == 0
+                offsets = [loop.extent.value for loop in my_ai_hw_conv2d_pass._loops]
+                args = buffers # + offsets
+                external_call = tvm.tir.Evaluate(tir_call(irb, True, "my_hw_ai_conv2dnchw", *args))
+                mac_calls = tvm.tir.SeqStmt([external_call])
+                irb.emit(mac_calls)
+                irb_result = irb.get()
+                return irb_result
+            return op
+
+        sch = tir.Schedule(func)
+
+        if self._has_block("conv2d_nchw", func):
+            conv2d_block = sch.get_block("conv2d_nchw")
+
+            rv_loops = sch.get_loops(conv2d_block)
+            assert len(rv_loops) == 7
+            n, co, h, w, ci, kh, hw = rv_loops
+            my_ai_hw_conv2d_pass._entry_node = sch.get(rv_loops[1])
+            my_ai_hw_conv2d_pass._loops = [sch.get(i) for i in rv_loops]
+            my_ai_hw_conv2d_pass._handles = func.buffer_map.items()
+
+            x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
+            return func.with_body(x)
+        else:
+            return sch.mod["main"]
+
+    def _transform_function(
+        func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        def _replace_conv2d(op):
+            if isinstance(op, tvm.tir.For) and op.loop_var.name == "yy":
+                irb = tvm.tir.ir_builder.create()
+                # Collection of buffer address
+                buffers = [b[1].data for b in func.buffer_map.items()]
+                args = buffers # + offsets
+                external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
+                mac_calls = tvm.tir.SeqStmt([external_call])
+                irb.emit(mac_calls)
+                irb_result = irb.get()
+                return irb_result
+            return op
+
+        x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
+        return func.with_body(x)
+
+    return _transform_function(func, mod, ctx)
+
+
+def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args):
+    """
+    ib: ir_builder
+    extern: bool
+        True  --> tvm.tir.call_extern
+        False --> tvm.tir.call_packed
+    name: str
+        function name
+    *args:
+        arguments for function call
+    """
+
+    def buf_from_array(ib, arr, dtype):
+        # Allocate enough memory to store the whole array
+        var = ib.allocate("int32", (len(arr),), scope="global")
+        for i, v in enumerate(arr):
+            var[i] = v
+        # Declare a buffer, which is basically a view on the chunk of memory that we allocated previously
+        buf = tvm.tir.decl_buffer((len(arr),), dtype, data=var, scope="global")
+        return buf
+
+    if extern:
+        args = [i.data if isinstance(i, tvm.tir.Buffer) else i for i in args]
+        call = tvm.tir.call_extern("int32", name, *args)
+    else:
+        args = [
+            buf_from_array(ib, i, "int32") if isinstance(i, (tuple, list, tvm.ir.container.Array)) else i for i in args
+        ]
+        call = tvm.tir.call_packed(name, *args)
+
+    return call
diff --git a/tests/python/unittest/test_uma_passes.py b/tests/python/unittest/test_uma_passes.py
new file mode 100644
index 000000000000..f25f56df3a94
--- /dev/null
+++ b/tests/python/unittest/test_uma_passes.py
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tvm import topi
+from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
+import numpy as np
+from tvm.contrib import utils, clang
+import tvm.testing
+from tvm import te
+
+
+def test_external_conv2d():
+    def c_to_llvm() -> str:
+        cc_code = """
+             extern "C" int my_hw_ai_conv2dnchw(float* data, float*  weight, float*  result) {
+                    result[0] = 42.0;
+                    result[1] = 3.14;
+
+              return 0;
+            }
+        """
+        temp = utils.tempdir()
+        ll_path = temp.relpath("conv2d.ll")
+        ll_code = clang.create_llvm(cc_code, output=ll_path)
+        return ll_code
+
+    target = tvm.target.Target(target="llvm", host="llvm")
+    dev = tvm.device(target.kind.name, 0)
+
+    ifmap = te.placeholder((1, 3, 224, 224), dtype="float32", name="ifmap")
+    weights = te.placeholder((1, 3, 3, 3), dtype="float32", name="weights")
+    ifmap_data = tvm.nd.array(np.random.uniform(size=(1, 3, 224, 224)).astype("float32"), dev)
+    weight_data = tvm.nd.array(np.random.uniform(size=(1, 3, 3, 3)).astype("float32"), dev)
+    result_data = tvm.nd.array(np.zeros((1, 1, 224, 224)).astype("float32"), dev)
+
+    result = topi.nn.conv2d_nchw(ifmap, weights,  stride=1, padding=1, dilation=1)
+
+    # Add pragma TE
+    s = te.create_schedule(result.op)
+    axis = result.op.axis
+    s[result].pragma(axis[0], "import_llvm", c_to_llvm())
+    with tvm.transform.PassContext(config={"tir.add_lower_pass": [(1, my_ai_hw_conv2d_pass)]}):
+        mod = tvm.lower(s, [ifmap, weights, result], simple_mode=True)
+
+    llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
+    llvm_mod(ifmap_data, weight_data, result_data)
+
+    print(result_data)
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 0], 42.0, rtol=1e-5)
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 1], 3.14, rtol=1e-5)
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 2], 0.0, rtol=1e-5)
+
+
+test_external_conv2d()
\ No newline at end of file

From 618e83a82d96a0b6afbb2a7d967b9d7395bb2a2d Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 24 Jun 2022 11:51:02 +0000
Subject: [PATCH 040/112] [UMA] update of tutorial

---
 .../backend/contrib/uma/_template/passes.py   |  36 ++---
 tests/python/unittest/test_uma_passes.py      | 140 +++++++++++++++---
 2 files changed, 135 insertions(+), 41 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index e78d52e93d06..dfbba0a23f95 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -27,35 +27,36 @@
 def my_ai_hw_conv2d_pass(func, mod, ctx):
     _found_blocks = []
     _loops = []
+    _handles = []
     _entry_node = None
     _external_function_name = "my_hw_ai_conv2dnchw"
 
-    def _has_block(self, name: str, func) -> bool:
+    def _has_block(name: str, func) -> bool:
         """
         Determine of a tir.block with `name` exists in `func`
         """
         def _hb(op):
             if isinstance(op, tvm.tir.Block):
-                my_ai_hw_conv2d_pass._found_blocks.append(op.name_hint)
+                _found_blocks.append(op.name_hint)
 
-        my_ai_hw_conv2d_pass._found_blocks = []
+        _found_blocks = []
         tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
-        return name in my_ai_hw_conv2d_pass._found_blocks
+        return name in _found_blocks
 
-    def transform_function2(
-        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    def _transform_function2(
+        func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
         def _replace_conv2d(op):
-            if op == my_ai_hw_conv2d_pass._entry_node:
+            if op == _entry_node:
                 irb = tvm.tir.ir_builder.create()
                 # Collection of buffer address
-                buffers = [b[1].data for b in my_ai_hw_conv2d_pass._handles]
+                buffers = [b[1].data for b in _handles]
                 # extraction of loop offsets
-                for i in my_ai_hw_conv2d_pass._loops:
+                for i in _loops:
                     assert i.min.value == 0
-                offsets = [loop.extent.value for loop in my_ai_hw_conv2d_pass._loops]
+                offsets = [loop.extent.value for loop in _loops]
                 args = buffers # + offsets
-                external_call = tvm.tir.Evaluate(tir_call(irb, True, "my_hw_ai_conv2dnchw", *args))
+                external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
                 mac_calls = tvm.tir.SeqStmt([external_call])
                 irb.emit(mac_calls)
                 irb_result = irb.get()
@@ -64,20 +65,20 @@ def _replace_conv2d(op):
 
         sch = tir.Schedule(func)
 
-        if self._has_block("conv2d_nchw", func):
+        if _has_block("conv2d_nchw", func):
             conv2d_block = sch.get_block("conv2d_nchw")
 
             rv_loops = sch.get_loops(conv2d_block)
             assert len(rv_loops) == 7
             n, co, h, w, ci, kh, hw = rv_loops
-            my_ai_hw_conv2d_pass._entry_node = sch.get(rv_loops[1])
-            my_ai_hw_conv2d_pass._loops = [sch.get(i) for i in rv_loops]
-            my_ai_hw_conv2d_pass._handles = func.buffer_map.items()
+            _entry_node = sch.get(rv_loops[1])
+            _loops = [sch.get(i) for i in rv_loops]
+            _handles = func.buffer_map.items()
 
             x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
             return func.with_body(x)
         else:
-            return sch.mod["main"]
+            return func #sch.mod["main"]
 
     def _transform_function(
         func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
@@ -98,7 +99,8 @@ def _replace_conv2d(op):
         x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
         return func.with_body(x)
 
-    return _transform_function(func, mod, ctx)
+    r = _transform_function2(func, mod, ctx)
+    return r
 
 
 def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args):
diff --git a/tests/python/unittest/test_uma_passes.py b/tests/python/unittest/test_uma_passes.py
index f25f56df3a94..863763a851af 100644
--- a/tests/python/unittest/test_uma_passes.py
+++ b/tests/python/unittest/test_uma_passes.py
@@ -16,54 +16,146 @@
 # under the License.
 
 import tvm
-from tvm import topi
+from tvm import topi, IRModule
 from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
 import numpy as np
 from tvm.contrib import utils, clang
 import tvm.testing
 from tvm import te
+from tvm.relay.backend.contrib.uma.api.lower import UMALower
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
 
+conv2d_c_code = """
+extern "C" int my_hw_ai_conv2dnchw(float* data, float*  weight, float*  result) {
+  result[0] = 42.0;
+  result[1] = 3.14;
+  /*
+  int ix = 224;
+  int iy = 224;
+  int ic = 3;
+  int kx = 3;
+  int ky = 3;
 
-def test_external_conv2d():
-    def c_to_llvm() -> str:
-        cc_code = """
-             extern "C" int my_hw_ai_conv2dnchw(float* data, float*  weight, float*  result) {
-                    result[0] = 42.0;
-                    result[1] = 3.14;
+  int pad_size = ix * iy * ic;
+  float*  pad_temp = new float[pad_size];
+  if (pad_temp == nullptr) {
+    return -1;
+  }
 
-              return 0;
+  for (int i1 = 0; i1 < ic; ++i1) {
+    for (int i2 = 0; i2 < ix; ++i2) {
+      for (int i3 = 0; i3 < iy; ++i3) {
+        ((float*)pad_temp)[(((i1 * 900) + (i2 * 30)) + i3)] = (((((1 <= i2) && (i2 < 29)) && (1 <= i3)) && (i3 < 29)) ? weight[((((i1 * 784) + (i2 * 28)) + i3) - 29)] : 0.000000e+00f);
+      }
+    }
+  }
+  
+  for (int i11 = 0; i11 < 256; ++i11) {
+    for (int i21 = 0; i21 < 14; ++i21) {
+      for (int i31 = 0; i31 < 14; ++i31) {
+        for (int i4 = 0; i4 < 256; ++i4) {
+          for (int i5 = 0; i5 < kx; ++i5) {
+            for (int i6 = 0; i6 < ky; ++i6) {
+              int cse_var_1 = (((i11 * 196) + (i21 * 14)) + i31);
+              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
+                result[cse_var_1] = 0.000000e+00f;
+              }
+              result[cse_var_1] = (result[cse_var_1] + (((float*)pad_temp)[(((((i4 * 900) + (i21 * 60)) + (i5 * 30)) + (i31 * 2)) + i6)] * data[((((i11 * 2304) + (i4 * 9)) + (i5 * 3)) + i6)]));
             }
-        """
-        temp = utils.tempdir()
-        ll_path = temp.relpath("conv2d.ll")
-        ll_code = clang.create_llvm(cc_code, output=ll_path)
-        return ll_code
+          }
+        }
+      }
+    }
+  }
+  
+  delete[] pad_temp;
+  */
+  return 0;
+}
+"""
+
+
+def _c_to_llvm(c_code: str) -> str:
+    temp = utils.tempdir()
+    ll_path = temp.relpath("conv2d.ll")
+    ll_code = clang.create_llvm([c_code], output=ll_path)
+    return ll_code
 
-    target = tvm.target.Target(target="llvm", host="llvm")
-    dev = tvm.device(target.kind.name, 0)
 
+def _conv2d_te_definition() -> list:
     ifmap = te.placeholder((1, 3, 224, 224), dtype="float32", name="ifmap")
     weights = te.placeholder((1, 3, 3, 3), dtype="float32", name="weights")
+    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=1, dilation=1)
+    return [ifmap, weights, result]
+
+
+def _pepare_conv2d_schedule():
+    target = tvm.target.Target(target="llvm", host="llvm")
+    dev = tvm.device(target.kind.name, 0)
+    placeholders = _conv2d_te_definition()
+    runtime_np_arrays = _generate_numpy_arrays(dev)
+    sch_tir = _add_llvm_to_tir(placeholders, conv2d_c_code)
+    return placeholders, runtime_np_arrays, sch_tir, target,
+
+
+def _add_llvm_to_tir(placeholder: list, c_code_str: str):
+    # How to do the same with TE
+    # Add pragma TE
+    # s = te.create_schedule(result.op)
+    # axis = result.op.axis
+    # s[result].pragma(axis[0], "import_llvm", c_to_llvm())
+    # with tvm.transform.PassContext(config={"tir.add_lower_pass": [(1, my_ai_hw_conv2d_pass)]}):
+    #     mod = tvm.lower(s, [ifmap, weights, result], simple_mode=True)
+    #
+    # llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
+    # llvm_mod(ifmap_data, weight_data, result_data)
+
+    func_tir = te.create_prim_func(placeholder)
+    ir_module_from_te = IRModule({"main": func_tir})
+    sch_tir = tvm.tir.Schedule(ir_module_from_te)
+    conv2d_b = sch_tir.get_block("conv2d_nchw")
+    conv2d_l = sch_tir.get_loops(conv2d_b)
+    sch_tir.annotate(conv2d_l[0], "pragma_import_llvm", _c_to_llvm(c_code_str))
+    return sch_tir
+
+
+def _generate_numpy_arrays(dev):
     ifmap_data = tvm.nd.array(np.random.uniform(size=(1, 3, 224, 224)).astype("float32"), dev)
     weight_data = tvm.nd.array(np.random.uniform(size=(1, 3, 3, 3)).astype("float32"), dev)
     result_data = tvm.nd.array(np.zeros((1, 1, 224, 224)).astype("float32"), dev)
+    return ifmap_data, weight_data, result_data
 
-    result = topi.nn.conv2d_nchw(ifmap, weights,  stride=1, padding=1, dilation=1)
 
-    # Add pragma TE
-    s = te.create_schedule(result.op)
-    axis = result.op.axis
-    s[result].pragma(axis[0], "import_llvm", c_to_llvm())
-    with tvm.transform.PassContext(config={"tir.add_lower_pass": [(1, my_ai_hw_conv2d_pass)]}):
-        mod = tvm.lower(s, [ifmap, weights, result], simple_mode=True)
+def test_lower_with_uma():
+    placeholders, runtime_np_arrays, schedule, target = _pepare_conv2d_schedule()
+    ifmap_data, weight_data, result_data = runtime_np_arrays
+
+    uma_lower = UMALower("lower_test")
+    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass))
+    with tvm.transform.PassContext():
+        tir_mod = uma_lower._lower_stir_to_nstir(schedule.mod["main"])
 
-    llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
+    llvm_mod = tvm.build(tir_mod, placeholders, target=target, name="test_external_conv2d")
     llvm_mod(ifmap_data, weight_data, result_data)
 
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 0], 42.0, rtol=1e-5)
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 1], 3.14, rtol=1e-5)
+    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 2], 0.0, rtol=1e-5)
     print(result_data)
+
+
+def test_lower_standalone():
+    ifmap, ifmap_data, result, result_data, sch_tir, target, weight_data, weights = _pepare_conv2d_schedule()
+    tir_mod = my_ai_hw_conv2d_pass(sch_tir.mod)
+    llvm_mod = tvm.build(tir_mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
+    llvm_mod(ifmap_data, weight_data, result_data)
+
     tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 0], 42.0, rtol=1e-5)
     tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 1], 3.14, rtol=1e-5)
     tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 2], 0.0, rtol=1e-5)
 
+    print(result_data)
+
 
-test_external_conv2d()
\ No newline at end of file
+#test_lower_standalone()
+test_lower_with_uma()

From efc788f514565426ae31a3e737277c2700c3f912 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 24 Jun 2022 16:48:06 +0000
Subject: [PATCH 041/112] [UMA] update of pass format, still issue with conv2d
 c code

---
 .../backend/contrib/uma/_template/passes.py   | 151 +++++++++---------
 1 file changed, 79 insertions(+), 72 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index dfbba0a23f95..dccf00b883c0 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Transform passes for the UltraTrail accelerator"""
+"""Transform passes for the my_ai_hw accelerator"""
 
 import tvm
 from tvm import relay, tir
@@ -24,83 +24,90 @@
 
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
-def my_ai_hw_conv2d_pass(func, mod, ctx):
-    _found_blocks = []
-    _loops = []
-    _handles = []
-    _entry_node = None
-    _external_function_name = "my_hw_ai_conv2dnchw"
-
-    def _has_block(name: str, func) -> bool:
-        """
-        Determine of a tir.block with `name` exists in `func`
-        """
-        def _hb(op):
-            if isinstance(op, tvm.tir.Block):
-                _found_blocks.append(op.name_hint)
+class my_ai_hw_conv2d_pass:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
 
+    @staticmethod
+    def _my_ai_hw_conv2d_pass(func, mod, ctx):
         _found_blocks = []
-        tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
-        return name in _found_blocks
-
-    def _transform_function2(
-        func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.tir.PrimFunc:
-        def _replace_conv2d(op):
-            if op == _entry_node:
-                irb = tvm.tir.ir_builder.create()
-                # Collection of buffer address
-                buffers = [b[1].data for b in _handles]
-                # extraction of loop offsets
-                for i in _loops:
-                    assert i.min.value == 0
-                offsets = [loop.extent.value for loop in _loops]
-                args = buffers # + offsets
-                external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
-                mac_calls = tvm.tir.SeqStmt([external_call])
-                irb.emit(mac_calls)
-                irb_result = irb.get()
-                return irb_result
-            return op
-
-        sch = tir.Schedule(func)
-
-        if _has_block("conv2d_nchw", func):
-            conv2d_block = sch.get_block("conv2d_nchw")
-
-            rv_loops = sch.get_loops(conv2d_block)
-            assert len(rv_loops) == 7
-            n, co, h, w, ci, kh, hw = rv_loops
-            _entry_node = sch.get(rv_loops[1])
-            _loops = [sch.get(i) for i in rv_loops]
-            _handles = func.buffer_map.items()
+        _loops = []
+        _handles = []
+        _entry_node = None
+        _external_function_name = "my_hw_ai_conv2dnchw"
+
+        def _has_block(name: str, func) -> bool:
+            """
+            Determine of a tir.block with `name` exists in `func`
+            """
+            def _hb(op):
+                if isinstance(op, tvm.tir.Block):
+                    _found_blocks.append(op.name_hint)
+
+            _found_blocks = []
+            tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
+            return name in _found_blocks
+
+        def _transform_function2(
+            func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+        ) -> tvm.tir.PrimFunc:
+            def _replace_conv2d(op):
+                if op == _entry_node:
+                    irb = tvm.tir.ir_builder.create()
+                    # Collection of buffer address
+                    buffers = [b[1].data for b in _handles]
+                    # extraction of loop offsets
+                    for i in _loops:
+                        assert i.min.value == 0
+                    offsets = [loop.extent.value for loop in _loops]
+                    args = buffers # + offsets
+                    external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
+                    mac_calls = tvm.tir.SeqStmt([external_call])
+                    irb.emit(mac_calls)
+                    irb_result = irb.get()
+                    return irb_result
+                return op
+
+            sch = tir.Schedule(func)
+
+            if _has_block("conv2d_nchw", func):
+                conv2d_block = sch.get_block("conv2d_nchw")
+
+                rv_loops = sch.get_loops(conv2d_block)
+                assert len(rv_loops) == 7
+                n, co, h, w, ci, kh, hw = rv_loops
+                _entry_node = sch.get(rv_loops[1])
+                _loops = [sch.get(i) for i in rv_loops]
+                _handles = func.buffer_map.items()
+
+                x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
+                return func.with_body(x)
+            else:
+                return func #sch.mod["main"]
+
+        def _transform_function(
+            func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+        ) -> tvm.tir.PrimFunc:
+            def _replace_conv2d(op):
+                if isinstance(op, tvm.tir.For) and op.loop_var.name == "yy":
+                    irb = tvm.tir.ir_builder.create()
+                    # Collection of buffer address
+                    buffers = [b[1].data for b in func.buffer_map.items()]
+                    args = buffers # + offsets
+                    external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
+                    mac_calls = tvm.tir.SeqStmt([external_call])
+                    irb.emit(mac_calls)
+                    irb_result = irb.get()
+                    return irb_result
+                return op
 
             x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
             return func.with_body(x)
-        else:
-            return func #sch.mod["main"]
 
-    def _transform_function(
-        func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.tir.PrimFunc:
-        def _replace_conv2d(op):
-            if isinstance(op, tvm.tir.For) and op.loop_var.name == "yy":
-                irb = tvm.tir.ir_builder.create()
-                # Collection of buffer address
-                buffers = [b[1].data for b in func.buffer_map.items()]
-                args = buffers # + offsets
-                external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
-                mac_calls = tvm.tir.SeqStmt([external_call])
-                irb.emit(mac_calls)
-                irb_result = irb.get()
-                return irb_result
-            return op
-
-        x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
-        return func.with_body(x)
-
-    r = _transform_function2(func, mod, ctx)
-    return r
+        r = _transform_function2(func, mod, ctx)
+        return r
 
 
 def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args):

From 0cf333285a7aac1af9365890acccd3a31d9471cd Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 27 Jun 2022 08:42:24 +0000
Subject: [PATCH 042/112] [UMA] refactoring of
 test_uma_lowering_with_umalower.py

---
 .../test_uma_lowering_with_umalower.py        |  88 ++++++++++
 tests/python/unittest/test_uma_passes.py      | 161 ------------------
 tests/python/unittest/test_uma_utils.py       | 130 ++++++++++++++
 3 files changed, 218 insertions(+), 161 deletions(-)
 create mode 100644 tests/python/unittest/test_uma_lowering_with_umalower.py
 delete mode 100644 tests/python/unittest/test_uma_passes.py
 create mode 100644 tests/python/unittest/test_uma_utils.py

diff --git a/tests/python/unittest/test_uma_lowering_with_umalower.py b/tests/python/unittest/test_uma_lowering_with_umalower.py
new file mode 100644
index 000000000000..ffb7c1fa3c30
--- /dev/null
+++ b/tests/python/unittest/test_uma_lowering_with_umalower.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tests.python.unittest.test_uma_utils import _create_schedule, _generate_io_arrays, conv2d_c_code
+from tvm import topi, IRModule
+from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
+import tvm.testing
+from tvm import te
+from tvm.relay.backend.contrib.uma.api.lower import UMALower
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+
+
+def _conv2d_te_definition(shapes: dict) -> list:
+    n, w, h, ci, kw, kh, co = shapes["n"], shapes["w"], shapes["h"], shapes["ci"], shapes["kw"], shapes["kh"], shapes["co"],
+    ifmap = te.placeholder((n, ci, w, h), dtype="float32", name="ifmap")
+    weights = te.placeholder((co, ci, kw, kh), dtype="float32", name="weights")
+    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=1, dilation=1)
+    return [ifmap, weights, result]
+
+
+def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True, ):
+    placeholders = _conv2d_te_definition(shapes)
+    sch_tir = _create_schedule(placeholders, conv2d_c_code, use_external_conv2d_impl=use_external_conv2d_impl)
+    return placeholders, sch_tir
+
+
+def _run_external_conv2d(dut_io_arrays, conv2d_shapes, target):
+    # Run conv2d with external function
+    placeholders, schedule = _pepare_conv2d_schedule(conv2d_shapes)
+
+    uma_lower = UMALower("lower_test")
+    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass()))
+    with tvm.transform.PassContext():
+        tir_mod = uma_lower._lower_stir_to_nstir(schedule.mod["main"])
+
+    ifmap_data, weight_data, result_data = dut_io_arrays
+
+    llvm_conv2d_mod = tvm.build(tir_mod, placeholders, target=target, name="test_external_conv2d")
+    llvm_conv2d_mod(ifmap_data, weight_data, result_data)
+
+
+def _run_reference_conv2d(reference_io_arrays, conv2d_shapes, target):
+    placeholders, schedule = _pepare_conv2d_schedule(conv2d_shapes)
+    ref_mod = tvm.build(schedule.mod, placeholders, target=target, name="test_reference_conv2d")
+    ifmap, weights, result = reference_io_arrays
+    ref_mod(ifmap, weights, result)
+
+
+def test_lower_with_uma():
+    target = tvm.target.Target(target="llvm", host="llvm")
+    dev = tvm.device(target.kind.name, 0)
+    conv2d_shapes = dict(n=1, w=224, h=224, ci=3, kw=3, kh=3, co=1)
+
+    dut_io_arrays, reference_io_arrays = _prepare_io_arrays(conv2d_shapes, dev)
+
+    _run_external_conv2d(dut_io_arrays, conv2d_shapes, target)
+    _run_reference_conv2d(reference_io_arrays, conv2d_shapes, target)
+
+    # compare results
+    dut_results = dut_io_arrays[2].numpy()
+    ref_results = reference_io_arrays[2].numpy()
+    tvm.testing.assert_allclose(dut_results, ref_results, rtol=1e-5)
+
+
+def _prepare_io_arrays(conv2d_shapes, dev):
+    dut_io_arrays = _generate_io_arrays(conv2d_shapes, dev)
+    _, _, ref_result = _generate_io_arrays(conv2d_shapes, dev)
+    reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
+    return dut_io_arrays, reference_io_arrays
+
+
+if __name__ == "__main__":
+    test_lower_with_uma()
diff --git a/tests/python/unittest/test_uma_passes.py b/tests/python/unittest/test_uma_passes.py
deleted file mode 100644
index 863763a851af..000000000000
--- a/tests/python/unittest/test_uma_passes.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import tvm
-from tvm import topi, IRModule
-from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
-import numpy as np
-from tvm.contrib import utils, clang
-import tvm.testing
-from tvm import te
-from tvm.relay.backend.contrib.uma.api.lower import UMALower
-from tvm.relay.backend.contrib.uma.api.utils import PassPhase
-
-conv2d_c_code = """
-extern "C" int my_hw_ai_conv2dnchw(float* data, float*  weight, float*  result) {
-  result[0] = 42.0;
-  result[1] = 3.14;
-  /*
-  int ix = 224;
-  int iy = 224;
-  int ic = 3;
-  int kx = 3;
-  int ky = 3;
-
-  int pad_size = ix * iy * ic;
-  float*  pad_temp = new float[pad_size];
-  if (pad_temp == nullptr) {
-    return -1;
-  }
-
-  for (int i1 = 0; i1 < ic; ++i1) {
-    for (int i2 = 0; i2 < ix; ++i2) {
-      for (int i3 = 0; i3 < iy; ++i3) {
-        ((float*)pad_temp)[(((i1 * 900) + (i2 * 30)) + i3)] = (((((1 <= i2) && (i2 < 29)) && (1 <= i3)) && (i3 < 29)) ? weight[((((i1 * 784) + (i2 * 28)) + i3) - 29)] : 0.000000e+00f);
-      }
-    }
-  }
-  
-  for (int i11 = 0; i11 < 256; ++i11) {
-    for (int i21 = 0; i21 < 14; ++i21) {
-      for (int i31 = 0; i31 < 14; ++i31) {
-        for (int i4 = 0; i4 < 256; ++i4) {
-          for (int i5 = 0; i5 < kx; ++i5) {
-            for (int i6 = 0; i6 < ky; ++i6) {
-              int cse_var_1 = (((i11 * 196) + (i21 * 14)) + i31);
-              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
-                result[cse_var_1] = 0.000000e+00f;
-              }
-              result[cse_var_1] = (result[cse_var_1] + (((float*)pad_temp)[(((((i4 * 900) + (i21 * 60)) + (i5 * 30)) + (i31 * 2)) + i6)] * data[((((i11 * 2304) + (i4 * 9)) + (i5 * 3)) + i6)]));
-            }
-          }
-        }
-      }
-    }
-  }
-  
-  delete[] pad_temp;
-  */
-  return 0;
-}
-"""
-
-
-def _c_to_llvm(c_code: str) -> str:
-    temp = utils.tempdir()
-    ll_path = temp.relpath("conv2d.ll")
-    ll_code = clang.create_llvm([c_code], output=ll_path)
-    return ll_code
-
-
-def _conv2d_te_definition() -> list:
-    ifmap = te.placeholder((1, 3, 224, 224), dtype="float32", name="ifmap")
-    weights = te.placeholder((1, 3, 3, 3), dtype="float32", name="weights")
-    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=1, dilation=1)
-    return [ifmap, weights, result]
-
-
-def _pepare_conv2d_schedule():
-    target = tvm.target.Target(target="llvm", host="llvm")
-    dev = tvm.device(target.kind.name, 0)
-    placeholders = _conv2d_te_definition()
-    runtime_np_arrays = _generate_numpy_arrays(dev)
-    sch_tir = _add_llvm_to_tir(placeholders, conv2d_c_code)
-    return placeholders, runtime_np_arrays, sch_tir, target,
-
-
-def _add_llvm_to_tir(placeholder: list, c_code_str: str):
-    # How to do the same with TE
-    # Add pragma TE
-    # s = te.create_schedule(result.op)
-    # axis = result.op.axis
-    # s[result].pragma(axis[0], "import_llvm", c_to_llvm())
-    # with tvm.transform.PassContext(config={"tir.add_lower_pass": [(1, my_ai_hw_conv2d_pass)]}):
-    #     mod = tvm.lower(s, [ifmap, weights, result], simple_mode=True)
-    #
-    # llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
-    # llvm_mod(ifmap_data, weight_data, result_data)
-
-    func_tir = te.create_prim_func(placeholder)
-    ir_module_from_te = IRModule({"main": func_tir})
-    sch_tir = tvm.tir.Schedule(ir_module_from_te)
-    conv2d_b = sch_tir.get_block("conv2d_nchw")
-    conv2d_l = sch_tir.get_loops(conv2d_b)
-    sch_tir.annotate(conv2d_l[0], "pragma_import_llvm", _c_to_llvm(c_code_str))
-    return sch_tir
-
-
-def _generate_numpy_arrays(dev):
-    ifmap_data = tvm.nd.array(np.random.uniform(size=(1, 3, 224, 224)).astype("float32"), dev)
-    weight_data = tvm.nd.array(np.random.uniform(size=(1, 3, 3, 3)).astype("float32"), dev)
-    result_data = tvm.nd.array(np.zeros((1, 1, 224, 224)).astype("float32"), dev)
-    return ifmap_data, weight_data, result_data
-
-
-def test_lower_with_uma():
-    placeholders, runtime_np_arrays, schedule, target = _pepare_conv2d_schedule()
-    ifmap_data, weight_data, result_data = runtime_np_arrays
-
-    uma_lower = UMALower("lower_test")
-    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass))
-    with tvm.transform.PassContext():
-        tir_mod = uma_lower._lower_stir_to_nstir(schedule.mod["main"])
-
-    llvm_mod = tvm.build(tir_mod, placeholders, target=target, name="test_external_conv2d")
-    llvm_mod(ifmap_data, weight_data, result_data)
-
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 0], 42.0, rtol=1e-5)
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 1], 3.14, rtol=1e-5)
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 2], 0.0, rtol=1e-5)
-    print(result_data)
-
-
-def test_lower_standalone():
-    ifmap, ifmap_data, result, result_data, sch_tir, target, weight_data, weights = _pepare_conv2d_schedule()
-    tir_mod = my_ai_hw_conv2d_pass(sch_tir.mod)
-    llvm_mod = tvm.build(tir_mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
-    llvm_mod(ifmap_data, weight_data, result_data)
-
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 0], 42.0, rtol=1e-5)
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 1], 3.14, rtol=1e-5)
-    tvm.testing.assert_allclose(result_data.numpy()[0, 0, 0, 2], 0.0, rtol=1e-5)
-
-    print(result_data)
-
-
-#test_lower_standalone()
-test_lower_with_uma()
diff --git a/tests/python/unittest/test_uma_utils.py b/tests/python/unittest/test_uma_utils.py
new file mode 100644
index 000000000000..5be2a52541d2
--- /dev/null
+++ b/tests/python/unittest/test_uma_utils.py
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import tvm
+from tvm import topi, IRModule
+import numpy as np
+from tvm.contrib import utils, clang
+import tvm.testing
+from tvm import te
+
+conv2d_c_code = """
+extern "C" int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result) {
+  result[0] = 42.0;
+  result[1] = 3.14;  // int iw = 7;
+  // int ih = 7;
+  // int ic = 512;
+  // int oc = 42;
+  // int kh = 3;
+  // int kw = 3;
+
+
+  int ic = 3;
+  int oc = 1;
+  int iw = 224;
+  int ih = 224;
+  int kh = 3;
+  int kw = 3;
+
+  int kw_low = kw / 2;
+  int kh_low = kh / 2;
+  int kw_high = iw + kw / 2;
+  int kh_high = ih + kh / 2;
+
+  int padded_iw = iw + 2 * kw_low; 
+  int padded_ih = ih + 2 * kh_low;
+
+  float* pad_temp = new float[(((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw)];
+
+  if (pad_temp == nullptr) {
+    return -1;
+  }
+
+  int shift = padded_iw * kh_low + kw_low;
+
+  for (int i1 = 0; i1 < ic; ++i1) {
+    for (int i2 = 0; i2 < padded_ih; ++i2) {
+      for (int i3 = 0; i3 < padded_iw; ++i3) {
+        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] = 
+           (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high)) ? ifmap[((((i1 * iw * ih) + (i2 * iw)) + i3) - shift)] : 0.000000e+00f);
+      }
+    }
+  }
+  for (int i11 = 0; i11 < oc; ++i11) { 
+    for (int i21 = 0; i21 < ih; ++i21) { 
+      for (int i31 = 0; i31 < iw; ++i31) { 
+        for (int i4 = 0; i4 < ic; ++i4) { 
+          for (int i5 = 0; i5 < kh; ++i5) { 
+            for (int i6 = 0; i6 < kw; ++i6) { 
+              int cse_var_1 = (((i11 * iw*ih) + (i21 * iw)) + i31);
+              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
+                result[cse_var_1] = 0.000000e+00f;
+              }
+              result[cse_var_1] = (result[cse_var_1] 
+              + (((float*)pad_temp)[i4 * padded_iw * padded_ih + (i21+i5) * padded_iw + i31 + i6] 
+              * weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
+            }
+          }
+        }
+      }
+    }
+  }
+  delete[] pad_temp;
+  return 0;
+}
+"""
+
+
+def _create_schedule(placeholder: list, c_code_str: str = "", use_external_conv2d_impl: bool = True):
+    # How to do the same with TE
+    # Add pragma TE
+    # s = te.create_schedule(result.op)
+    # axis = result.op.axis
+    # s[result].pragma(axis[0], "import_llvm", c_to_llvm())
+    # with tvm.transform.PassContext(config={"tir.add_lower_pass": [(1, my_ai_hw_conv2d_pass)]}):
+    #     mod = tvm.lower(s, [ifmap, weights, result], simple_mode=True)
+    #
+    # llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
+    # llvm_mod(ifmap_data, weight_data, result_data)
+
+    assert use_external_conv2d_impl and c_code_str != "" \
+           or not use_external_conv2d_impl and c_code_str == ""
+
+    def _c_to_llvm(c_code: str) -> str:
+        temp = utils.tempdir()
+        ll_path = temp.relpath("conv2d.ll")
+        ll_code = clang.create_llvm([c_code], output=ll_path)
+        return ll_code
+
+    func_tir = te.create_prim_func(placeholder)
+    ir_module_from_te = IRModule({"main": func_tir})
+    sch_tir = tvm.tir.Schedule(ir_module_from_te)
+    if use_external_conv2d_impl:
+        conv2d_b = sch_tir.get_block("conv2d_nchw")
+        conv2d_l = sch_tir.get_loops(conv2d_b)
+        sch_tir.annotate(conv2d_l[0], "pragma_import_llvm", _c_to_llvm(c_code_str))
+    return sch_tir
+
+
+def _generate_io_arrays(shapes: dict, dev):
+    n, w, h, ci, kw, kh, co = shapes["n"], shapes["w"], shapes["h"], shapes["ci"], shapes["kw"], shapes["kh"], shapes["co"],
+
+    ifmap_data = tvm.nd.array(np.random.uniform(size=(n, ci, w, h)).astype("float32"), dev)
+    weight_data = tvm.nd.array(np.random.uniform(size=(co, ci, kh, kw)).astype("float32"), dev)
+    result_data = tvm.nd.array(np.zeros((n, co, w, h)).astype("float32"), dev)
+    return ifmap_data, weight_data, result_data

From 2c8f94eb3bf7820f3bed4640f4c49de34908aba3 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 27 Jun 2022 09:43:01 +0000
Subject: [PATCH 043/112] [UMA] refactoring of
 test_uma_lowering_with_umalower.py

---
 .../backend/contrib/uma/_template/passes.py   | 19 -------------------
 .../test_uma_lowering_with_umalower.py        | 14 ++++++++------
 2 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index dccf00b883c0..6847ccb25d4e 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -87,25 +87,6 @@ def _replace_conv2d(op):
             else:
                 return func #sch.mod["main"]
 
-        def _transform_function(
-            func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-        ) -> tvm.tir.PrimFunc:
-            def _replace_conv2d(op):
-                if isinstance(op, tvm.tir.For) and op.loop_var.name == "yy":
-                    irb = tvm.tir.ir_builder.create()
-                    # Collection of buffer address
-                    buffers = [b[1].data for b in func.buffer_map.items()]
-                    args = buffers # + offsets
-                    external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
-                    mac_calls = tvm.tir.SeqStmt([external_call])
-                    irb.emit(mac_calls)
-                    irb_result = irb.get()
-                    return irb_result
-                return op
-
-            x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
-            return func.with_body(x)
-
         r = _transform_function2(func, mod, ctx)
         return r
 
diff --git a/tests/python/unittest/test_uma_lowering_with_umalower.py b/tests/python/unittest/test_uma_lowering_with_umalower.py
index ffb7c1fa3c30..a1b4a2dc3a19 100644
--- a/tests/python/unittest/test_uma_lowering_with_umalower.py
+++ b/tests/python/unittest/test_uma_lowering_with_umalower.py
@@ -33,7 +33,7 @@ def _conv2d_te_definition(shapes: dict) -> list:
     return [ifmap, weights, result]
 
 
-def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True, ):
+def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
     sch_tir = _create_schedule(placeholders, conv2d_c_code, use_external_conv2d_impl=use_external_conv2d_impl)
     return placeholders, sch_tir
@@ -61,6 +61,13 @@ def _run_reference_conv2d(reference_io_arrays, conv2d_shapes, target):
     ref_mod(ifmap, weights, result)
 
 
+def _prepare_io_arrays(conv2d_shapes, dev):
+    dut_io_arrays = _generate_io_arrays(conv2d_shapes, dev)
+    _, _, ref_result = _generate_io_arrays(conv2d_shapes, dev)
+    reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
+    return dut_io_arrays, reference_io_arrays
+
+
 def test_lower_with_uma():
     target = tvm.target.Target(target="llvm", host="llvm")
     dev = tvm.device(target.kind.name, 0)
@@ -77,11 +84,6 @@ def test_lower_with_uma():
     tvm.testing.assert_allclose(dut_results, ref_results, rtol=1e-5)
 
 
-def _prepare_io_arrays(conv2d_shapes, dev):
-    dut_io_arrays = _generate_io_arrays(conv2d_shapes, dev)
-    _, _, ref_result = _generate_io_arrays(conv2d_shapes, dev)
-    reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
-    return dut_io_arrays, reference_io_arrays
 
 
 if __name__ == "__main__":

From 3e58dfb8083b0672821677bc63ab04053dbffd7a Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 27 Jun 2022 09:47:41 +0000
Subject: [PATCH 044/112] [UMA] Adding backend, codegen, patterns, strategies
 and run file for MyAiHw

---
 .../backend/contrib/uma/_template/backend.py  |  56 ++++++++
 .../backend/contrib/uma/_template/codegen.py  |  32 +++++
 .../backend/contrib/uma/_template/patterns.py |  33 +++++
 .../backend/contrib/uma/_template/run.py      |  76 +++++++++++
 .../contrib/uma/_template/strategies.py       | 120 ++++++++++++++++++
 5 files changed, 317 insertions(+)
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/backend.py
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/codegen.py
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/patterns.py
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/run.py
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/strategies.py

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
new file mode 100644
index 000000000000..fd598afe52fe
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA backend for the MyAiHw accelerator"""
+from .passes import my_ai_hw_conv2d_pass
+from ..api.utils import PassPhase
+from ..backend import UMABackend
+from .codegen import gen_includes, gen_replace_call_extern
+from .patterns import conv2d_pattern
+
+
+class MyAiHwBackend(UMABackend):
+    """UMA backend for the MyAiHw accelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        #######################################################################
+        # Target configuration
+        #######################################################################
+        self._register_target_attr("dimension")
+
+        #######################################################################
+        # Relay to Relay function registration
+        #######################################################################
+        self._register_pattern("conv2d", conv2d_pattern())
+
+        #######################################################################
+        # Relay to TIR function registration
+        #######################################################################
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
+
+
+        #######################################################################
+        # TIR to runtime function registration
+        #######################################################################
+        self._register_codegen(
+            fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
+        )
+
+    @property
+    def target_name(self):
+        return "my_ai_hw"
diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
new file mode 100644
index 000000000000..80e6086a3a32
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA codegen for the MyAiHw accelerator"""
+
+import tvm
+
+
+def gen_includes() -> str:
+    # TODO update for tutorial
+    includes = ""
+    includes += "#include <cmem.h>\n"
+    includes += "#include <archi.h>\n"
+    includes += "#include <hal.h>\n"
+    return includes
+
+
+def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
+    return "my_custom_api_function({}, {}, {})".format(*args)
diff --git a/python/tvm/relay/backend/contrib/uma/_template/patterns.py b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
new file mode 100644
index 000000000000..9a4dbceacc1b
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Relay graph patterns for the $my_ai_hw accelerator"""
+
+from tvm.relay.dataflow_pattern import is_op, wildcard, has_attr
+
+
+def conv2d_pattern():
+    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
+    pattern = pattern.has_attr({"strides": [1, 1]})
+    return pattern
+
+def dense_pattern():
+    pattern = is_op("nn.dense")(wildcard(), wildcard())
+    pattern = pattern.optional(
+        lambda x: is_op("nn.bias_add")(x, wildcard()) | is_op("add")(x, wildcard())
+    )
+    pattern = pattern.optional(lambda x: is_op("nn.relu")(x))
+    return pattern
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
new file mode 100644
index 000000000000..8acefea695b2
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tvm import relay
+from tvm.contrib.download import download_testdata
+from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
+
+import numpy as np
+import tarfile
+from pathlib import Path
+
+import onnx
+
+
+def main():
+    model_url = "".join(
+        ["https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v2-7.onnx"])
+    model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
+    # now you have the onnx model on disk
+    onnx_model = onnx.load(model_path)
+
+    input_name = "data"
+    shape_dict = {input_name: (1, 3, 224, 224)}
+    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+
+    print(mod)
+
+    # Relay target specific partitioning
+    uma_backend = MyAiHwBackend()
+    uma_backend.register()
+
+    @tvm.register_func("relay.ext.my_ai_hw")
+    def uma_compiler(ref):
+        print(ref)
+
+    mod = uma_backend.partition(mod)
+
+    # Relay build (AOT C target)
+    TARGET = tvm.target.Target("c")
+    RUNTIME = tvm.relay.backend.Runtime("crt")
+    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
+
+    with tvm.transform.PassContext(
+        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
+    ):
+        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
+
+    model_library_format_tar_path = Path("build/lib.tar")
+    model_library_format_tar_path.unlink(missing_ok=True)
+    model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
+
+    tvm.micro.export_model_library_format(module, model_library_format_tar_path)
+
+    print("Built MLF Library: ")
+    with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
+        print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
+        tar_f.extractall(model_library_format_tar_path.parent)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/tvm/relay/backend/contrib/uma/_template/strategies.py b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
new file mode 100644
index 000000000000..cd01a8c87132
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
@@ -0,0 +1,120 @@
+from tvm import relay, te
+from tvm.relay.op import op as _op
+from tvm.topi.utils import get_const_tuple
+from tvm.topi.nn.utils import get_pad_tuple1d
+from tvm.relay.op.strategy.generic import wrap_compute_conv1d, wrap_topi_schedule
+
+import logging
+
+logger = logging.getLogger("strategy")
+
+def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None):
+    """1D convolution forward operator for NCW layout.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        3-D with shape [batch, in_channel, in_width]
+
+    kernel : tvm.te.Tensor
+        3-D with shape [num_filter, in_channel, filter_size]
+
+    strides : int or tuple
+        The spatial stride along width
+
+    padding : int, tuple, or str
+        Padding size can be an integer for equal padding,
+        a tuple of (left, right) or a string in ['VALID', 'SAME'].
+
+    dilation : int or tuple
+        Dilation rate if convolution should be dilated.
+
+    out_dtype : str
+        The output data type. If None then output is same type as input.
+    """
+    s = strides
+    d = dilation
+    if out_dtype is None:
+        out_dtype = data.dtype
+    if isinstance(strides, (tuple, list)):
+        s = strides[0]
+    if isinstance(dilation, (tuple, list)):
+        d = dilation[0]
+
+    batch, in_channels, data_width = data.shape
+    out_channels, _, kernel_size = kernel.shape
+
+    # Compute padding and out width
+    pad_left, pad_right = get_pad_tuple1d(padding, (kernel_size,))
+    if pad_left != pad_right:
+        raise ValueError("Padding has to be symmetric. Got %d %d" % pad_left, pad_right)
+    p = pad_left
+    out_width = (data_width + 2 * p - kernel_size - (kernel_size - 1) * (d - 1)) // s + 1
+
+    # Compute graph
+    rc = te.reduce_axis((0, in_channels), name="rc")
+    rx = te.reduce_axis((0, kernel_size), name="rx")
+    return te.compute(
+        (batch, out_channels, out_width),
+        lambda nn, kk, xx: te.sum(
+            te.if_then_else(
+                te.any(s * xx + d * rx - p < 0, s * xx + d * rx - p >= data_width),
+                0.0,
+                data[nn, rc, s * xx + d * rx - p].astype(out_dtype)
+                * kernel[kk, rc, rx].astype(out_dtype),
+            ),
+            axis=[rc, rx],
+        ),
+        tag="custom_conv1d_ncw",
+    )
+
+
+# TVM integration: Add schedule to `python/tvm/topi/generic/nn.py`
+def schedule_conv1d_ncw(outs):
+    """Schedule for conv1d_ncw
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of conv1d_ncw
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    s = te.create_schedule([x.op for x in outs])
+    nn, kk, xx = s[outs[0]].op.axis
+    rc, rx = s[outs[0]].op.reduce_axis
+
+    # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
+    # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
+
+    # s[outs[0]].reorder(kk_outer, xx_outer, kk_inner, xx_inner)
+    # s[outs[0]].vectorize(xx)
+    # s[outs[0]].unroll(rc)
+
+    return s
+
+
+# TVM integration: Add strategy to `python/tvm/relay/op/strategy/generic.py`
+@relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
+def custom_conv1d_strategy(attrs, inputs, out_type, target):
+    """custom conv1d generic strategy"""
+    logger.warning("custom conv1d is not optimized for this platform.")
+    layout = attrs.data_layout
+    dilation = get_const_tuple(attrs.dilation)
+    if dilation[0] < 1:
+        raise ValueError("dilation should be a positive value")
+    strategy = _op.OpStrategy()
+    if layout == "NCW":
+        strategy.add_implementation(
+            wrap_compute_conv1d(conv1d_ncw),
+            wrap_topi_schedule(schedule_conv1d_ncw),
+            name="custom_conv1d_ncw.generic",
+        )
+    else:
+        raise ValueError("Unsupported conv1d layout {}".format(layout))
+    return strategy

From 120f32cd67e0f711b786dbdf0de608338d771015 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 28 Jun 2022 14:42:07 +0000
Subject: [PATCH 045/112] [UMA] update towards my_ai_hw usecase

---
 .../backend/contrib/uma/_template/backend.py  |   6 +-
 .../backend/contrib/uma/_template/codegen.py  |   7 +-
 .../backend/contrib/uma/_template/passes.py   |  48 ++++---
 .../backend/contrib/uma/_template/patterns.py |   3 +-
 .../backend/contrib/uma/_template/run.py      |  17 ++-
 .../contrib/uma/_template/strategies.py       | 132 +++---------------
 .../relay/backend/contrib/uma/api/codegen.py  |  20 +--
 .../relay/backend/contrib/uma/api/utils.py    |  21 +++
 .../test_uma_lowering_with_umalower.py        |   4 +-
 tests/python/unittest/test_uma_utils.py       |  18 +--
 10 files changed, 106 insertions(+), 170 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index fd598afe52fe..7880f1c69b30 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""UMA backend for the MyAiHw accelerator"""
+"""UMA backend for the my_ai_hw accelerator"""
 from .passes import my_ai_hw_conv2d_pass
 from ..api.utils import PassPhase
 from ..backend import UMABackend
@@ -41,14 +41,14 @@ def __init__(self):
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
+        #self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
 
 
         #######################################################################
         # TIR to runtime function registration
         #######################################################################
         self._register_codegen(
-            fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
+            fmt="c", includes=gen_includes #, replace_call_extern=gen_replace_call_extern
         )
 
     @property
diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
index 80e6086a3a32..16d3ef9f5f86 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
@@ -14,17 +14,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""UMA codegen for the MyAiHw accelerator"""
+"""UMA codegen for the my_ai_hw accelerator"""
 
 import tvm
 
 
 def gen_includes() -> str:
-    # TODO update for tutorial
     includes = ""
-    includes += "#include <cmem.h>\n"
-    includes += "#include <archi.h>\n"
-    includes += "#include <hal.h>\n"
+    includes += "#include \"conv2dnchw.cpp\""
     return includes
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 6847ccb25d4e..06507c8189da 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -18,9 +18,7 @@
 
 import tvm
 from tvm import relay, tir
-from tvm.topi.utils import prod
-
-from collections import OrderedDict
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
 
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
@@ -33,15 +31,17 @@ def transform_function(
     @staticmethod
     def _my_ai_hw_conv2d_pass(func, mod, ctx):
         _found_blocks = []
-        _loops = []
+        _loops = dict()
         _handles = []
         _entry_node = None
         _external_function_name = "my_hw_ai_conv2dnchw"
+        _tvm_block_match_name = "conv2d_nchw"
 
         def _has_block(name: str, func) -> bool:
             """
             Determine of a tir.block with `name` exists in `func`
             """
+
             def _hb(op):
                 if isinstance(op, tvm.tir.Block):
                     _found_blocks.append(op.name_hint)
@@ -50,7 +50,7 @@ def _hb(op):
             tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
             return name in _found_blocks
 
-        def _transform_function2(
+        def _transform_function(
             func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
         ) -> tvm.tir.PrimFunc:
             def _replace_conv2d(op):
@@ -59,11 +59,14 @@ def _replace_conv2d(op):
                     # Collection of buffer address
                     buffers = [b[1].data for b in _handles]
                     # extraction of loop offsets
-                    for i in _loops:
-                        assert i.min.value == 0
-                    offsets = [loop.extent.value for loop in _loops]
-                    args = buffers # + offsets
-                    external_call = tvm.tir.Evaluate(tir_call(irb, True, _external_function_name, *args))
+                    for k, v in _loops.items():
+                        assert v.min.value == 0
+                    offset_order = ["co", "w", "h", "ci", "kh", "kw"]
+                    offsets = [_loops[i].extent.value for i in offset_order]
+                    args = buffers + offsets
+                    external_call = tvm.tir.Evaluate(
+                        tir_call(irb, True, _external_function_name, *args)
+                    )
                     mac_calls = tvm.tir.SeqStmt([external_call])
                     irb.emit(mac_calls)
                     irb_result = irb.get()
@@ -72,22 +75,30 @@ def _replace_conv2d(op):
 
             sch = tir.Schedule(func)
 
-            if _has_block("conv2d_nchw", func):
-                conv2d_block = sch.get_block("conv2d_nchw")
+            if _has_block(_tvm_block_match_name, func):
+                conv2d_block = sch.get_block(_tvm_block_match_name)
 
                 rv_loops = sch.get_loops(conv2d_block)
                 assert len(rv_loops) == 7
-                n, co, h, w, ci, kh, hw = rv_loops
+                loops = dict(
+                    n=rv_loops[0],
+                    co=rv_loops[1],
+                    h=rv_loops[2],
+                    w=rv_loops[3],
+                    ci=rv_loops[4],
+                    kh=rv_loops[5],
+                    kw=rv_loops[6],
+                )
                 _entry_node = sch.get(rv_loops[1])
-                _loops = [sch.get(i) for i in rv_loops]
+                _loops = {k: sch.get(v) for k, v in loops.items()}
                 _handles = func.buffer_map.items()
 
                 x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
                 return func.with_body(x)
             else:
-                return func #sch.mod["main"]
+                return func
 
-        r = _transform_function2(func, mod, ctx)
+        r = _transform_function(func, mod, ctx)
         return r
 
 
@@ -117,7 +128,10 @@ def buf_from_array(ib, arr, dtype):
         call = tvm.tir.call_extern("int32", name, *args)
     else:
         args = [
-            buf_from_array(ib, i, "int32") if isinstance(i, (tuple, list, tvm.ir.container.Array)) else i for i in args
+            buf_from_array(ib, i, "int32")
+            if isinstance(i, (tuple, list, tvm.ir.container.Array))
+            else i
+            for i in args
         ]
         call = tvm.tir.call_packed(name, *args)
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/patterns.py b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
index 9a4dbceacc1b..e4084c81c1e6 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/patterns.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Relay graph patterns for the $my_ai_hw accelerator"""
+"""Relay graph patterns for the my_ai_hw accelerator"""
 
 from tvm.relay.dataflow_pattern import is_op, wildcard, has_attr
 
@@ -24,6 +24,7 @@ def conv2d_pattern():
     pattern = pattern.has_attr({"strides": [1, 1]})
     return pattern
 
+
 def dense_pattern():
     pattern = is_op("nn.dense")(wildcard(), wildcard())
     pattern = pattern.optional(
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index 8acefea695b2..4b84f1ed2752 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -51,12 +51,23 @@ def uma_compiler(ref):
     mod = uma_backend.partition(mod)
 
     # Relay build (AOT C target)
-    TARGET = tvm.target.Target("c")
+    TARGET = "c"
     RUNTIME = tvm.relay.backend.Runtime("crt")
-    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
+    EXECUTOR = tvm.relay.backend.Executor(
+        "aot",
+        {
+            "workspace-byte-alignment": 8,
+        },
+    )
 
     with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
+        opt_level=3,
+        config={"tir.disable_vectorize": True,
+                "tir.disable_storage_rewrite": True,
+                "tir.usmp.enable": True,
+                "tir.usmp.algorithm": "greedy_by_conflicts"
+                },
+        disabled_pass=["AlterOpLayout"]
     ):
         module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/strategies.py b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
index cd01a8c87132..101c62bd0a00 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/strategies.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
@@ -1,3 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Strategies for the my_ai_hw accelerator"""
+
+
 from tvm import relay, te
 from tvm.relay.op import op as _op
 from tvm.topi.utils import get_const_tuple
@@ -5,116 +24,3 @@
 from tvm.relay.op.strategy.generic import wrap_compute_conv1d, wrap_topi_schedule
 
 import logging
-
-logger = logging.getLogger("strategy")
-
-def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None):
-    """1D convolution forward operator for NCW layout.
-
-    Parameters
-    ----------
-    data : tvm.te.Tensor
-        3-D with shape [batch, in_channel, in_width]
-
-    kernel : tvm.te.Tensor
-        3-D with shape [num_filter, in_channel, filter_size]
-
-    strides : int or tuple
-        The spatial stride along width
-
-    padding : int, tuple, or str
-        Padding size can be an integer for equal padding,
-        a tuple of (left, right) or a string in ['VALID', 'SAME'].
-
-    dilation : int or tuple
-        Dilation rate if convolution should be dilated.
-
-    out_dtype : str
-        The output data type. If None then output is same type as input.
-    """
-    s = strides
-    d = dilation
-    if out_dtype is None:
-        out_dtype = data.dtype
-    if isinstance(strides, (tuple, list)):
-        s = strides[0]
-    if isinstance(dilation, (tuple, list)):
-        d = dilation[0]
-
-    batch, in_channels, data_width = data.shape
-    out_channels, _, kernel_size = kernel.shape
-
-    # Compute padding and out width
-    pad_left, pad_right = get_pad_tuple1d(padding, (kernel_size,))
-    if pad_left != pad_right:
-        raise ValueError("Padding has to be symmetric. Got %d %d" % pad_left, pad_right)
-    p = pad_left
-    out_width = (data_width + 2 * p - kernel_size - (kernel_size - 1) * (d - 1)) // s + 1
-
-    # Compute graph
-    rc = te.reduce_axis((0, in_channels), name="rc")
-    rx = te.reduce_axis((0, kernel_size), name="rx")
-    return te.compute(
-        (batch, out_channels, out_width),
-        lambda nn, kk, xx: te.sum(
-            te.if_then_else(
-                te.any(s * xx + d * rx - p < 0, s * xx + d * rx - p >= data_width),
-                0.0,
-                data[nn, rc, s * xx + d * rx - p].astype(out_dtype)
-                * kernel[kk, rc, rx].astype(out_dtype),
-            ),
-            axis=[rc, rx],
-        ),
-        tag="custom_conv1d_ncw",
-    )
-
-
-# TVM integration: Add schedule to `python/tvm/topi/generic/nn.py`
-def schedule_conv1d_ncw(outs):
-    """Schedule for conv1d_ncw
-
-    Parameters
-    ----------
-    outs: Array of Tensor
-          The computation graph description of conv1d_ncw
-          in the format of an array of tensors.
-
-    Returns
-    -------
-    sch: Schedule
-        The computation schedule for the op.
-    """
-    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
-    s = te.create_schedule([x.op for x in outs])
-    nn, kk, xx = s[outs[0]].op.axis
-    rc, rx = s[outs[0]].op.reduce_axis
-
-    # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
-    # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
-
-    # s[outs[0]].reorder(kk_outer, xx_outer, kk_inner, xx_inner)
-    # s[outs[0]].vectorize(xx)
-    # s[outs[0]].unroll(rc)
-
-    return s
-
-
-# TVM integration: Add strategy to `python/tvm/relay/op/strategy/generic.py`
-@relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
-def custom_conv1d_strategy(attrs, inputs, out_type, target):
-    """custom conv1d generic strategy"""
-    logger.warning("custom conv1d is not optimized for this platform.")
-    layout = attrs.data_layout
-    dilation = get_const_tuple(attrs.dilation)
-    if dilation[0] < 1:
-        raise ValueError("dilation should be a positive value")
-    strategy = _op.OpStrategy()
-    if layout == "NCW":
-        strategy.add_implementation(
-            wrap_compute_conv1d(conv1d_ncw),
-            wrap_topi_schedule(schedule_conv1d_ncw),
-            name="custom_conv1d_ncw.generic",
-        )
-    else:
-        raise ValueError("Unsupported conv1d layout {}".format(layout))
-    return strategy
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index 8e7f6ccbfd94..92a9b8d647ca 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -33,16 +33,18 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
 
     def _register_c_codegen(
         self,
-        includes: Callable[[], str],
-        replace_call_extern: Callable[[tvm.ir.container.Array], str],
+        includes: Callable[[], str] = None,
+        replace_call_extern: Callable[[tvm.ir.container.Array], str] = None,
     ) -> None:
-        tvm._ffi.register_func(
-            "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
-        )
-        tvm._ffi.register_func(
-            "relay.ext.uma.codegen_c_replace_call_extern_{}".format(self.target_name),
-            replace_call_extern,
-        )
+        if includes is not None:
+            tvm._ffi.register_func(
+                "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
+            )
+        if replace_call_extern is not None:
+            tvm._ffi.register_func(
+                "relay.ext.uma.codegen_c_replace_call_extern_{}".format(self.target_name),
+                replace_call_extern,
+            )
 
     def register(self) -> None:
         pass
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index f0d7de1dec87..0e4a21b2d214 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -19,6 +19,11 @@
 from enum import Enum, auto
 
 # TODO: naming
+import tvm.tir
+from tvm.contrib import utils, clang
+import uuid
+
+
 class PassPhase(Enum):
     """UMA pass phases."""
 
@@ -29,3 +34,19 @@ class PassPhase(Enum):
     TIR_PHASE_1 = auto()
     TIR_PHASE_2 = auto()
     TIR_PHASE_3 = auto()
+
+
+def _c_to_llvm(c_code: str) -> str:
+    unique_filename = str(uuid.uuid4())
+    temp = utils.tempdir()
+    ll_path = temp.relpath(f"{unique_filename}.ll")
+    ll_code = clang.create_llvm([c_code], output=ll_path)
+    return ll_code
+
+
+def add_llvm_to_block(sch: tvm.tir.Schedule, block_name: str, c_code_str: str = "") -> tvm.tir.Schedule:
+    block = sch.get_block(block_name)
+    loops = sch.get_loops(block)
+    assert len(loops) > 0
+    sch.annotate(loops[0], "pragma_import_llvm", _c_to_llvm(c_code_str))
+    return sch
diff --git a/tests/python/unittest/test_uma_lowering_with_umalower.py b/tests/python/unittest/test_uma_lowering_with_umalower.py
index a1b4a2dc3a19..4470ef723797 100644
--- a/tests/python/unittest/test_uma_lowering_with_umalower.py
+++ b/tests/python/unittest/test_uma_lowering_with_umalower.py
@@ -17,7 +17,7 @@
 
 import tvm
 from tests.python.unittest.test_uma_utils import _create_schedule, _generate_io_arrays, conv2d_c_code
-from tvm import topi, IRModule
+from tvm import topi
 from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
 import tvm.testing
 from tvm import te
@@ -84,7 +84,5 @@ def test_lower_with_uma():
     tvm.testing.assert_allclose(dut_results, ref_results, rtol=1e-5)
 
 
-
-
 if __name__ == "__main__":
     test_lower_with_uma()
diff --git a/tests/python/unittest/test_uma_utils.py b/tests/python/unittest/test_uma_utils.py
index 5be2a52541d2..ad359cec7c20 100644
--- a/tests/python/unittest/test_uma_utils.py
+++ b/tests/python/unittest/test_uma_utils.py
@@ -24,22 +24,8 @@
 from tvm import te
 
 conv2d_c_code = """
-extern "C" int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result) {
-  result[0] = 42.0;
-  result[1] = 3.14;  // int iw = 7;
-  // int ih = 7;
-  // int ic = 512;
-  // int oc = 42;
-  // int kh = 3;
-  // int kw = 3;
-
-
-  int ic = 3;
-  int oc = 1;
-  int iw = 224;
-  int ih = 224;
-  int kh = 3;
-  int kw = 3;
+extern "C" int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result,
+                                   int oc, int iw, int ih, int ic, int kh, int kw) {
 
   int kw_low = kw / 2;
   int kh_low = kh / 2;

From 1b5cff35bce9dd174ebe2157286e9bb5edb0d8ef Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 29 Jun 2022 08:57:45 +0000
Subject: [PATCH 046/112] [UMA] working testcase for conv2d with uma

---
 .../contrib/uma/_template/conv2dnchw.cpp      | 69 +++++++++++++++++++
 .../test_uma_lowering_with_umalower.py        | 26 +++++--
 tests/python/unittest/test_uma_utils.py       | 65 +++--------------
 3 files changed, 99 insertions(+), 61 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp

diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
new file mode 100644
index 000000000000..fa3d6a5295d8
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
@@ -0,0 +1,69 @@
+/*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result,
+                                   int oc, int iw, int ih, int ic, int kh, int kw) {
+
+  int kw_low = kw / 2;
+  int kh_low = kh / 2;
+  int kw_high = iw + kw / 2;
+  int kh_high = ih + kh / 2;
+
+  int padded_iw = iw + 2 * kw_low;
+  int padded_ih = ih + 2 * kh_low;
+
+  float* pad_temp = new float[(((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw)];
+
+  if (pad_temp == nullptr) {
+    return -1;
+  }
+
+  for (int i1 = 0; i1 < ic; ++i1) {
+    for (int i2 = 0; i2 < padded_ih; ++i2) {
+      for (int i3 = 0; i3 < padded_iw; ++i3) {
+        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] =
+           (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high)) ? ifmap[((((i1 * iw * ih) + ((i2-kh_low) * iw)) + i3 - kw_low) )] : 0.000000e+00f);
+      }
+    }
+  }
+  for (int i11 = 0; i11 < oc; ++i11) {
+    for (int i21 = 0; i21 < ih; ++i21) {
+      for (int i31 = 0; i31 < iw; ++i31) {
+        for (int i4 = 0; i4 < ic; ++i4) {
+          for (int i5 = 0; i5 < kh; ++i5) {
+            for (int i6 = 0; i6 < kw; ++i6) {
+              int cse_var_1 = (((i11 * iw*ih) + (i21 * iw)) + i31);
+              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
+                result[cse_var_1] = 0.000000e+00f;
+              }
+              result[cse_var_1] = (result[cse_var_1]
+              + (((float*)pad_temp)[i4 * padded_iw * padded_ih + (i21+i5) * padded_iw + i31 + i6]
+              * weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
+            }
+          }
+        }
+      }
+    }
+  }
+  delete[] pad_temp;
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/python/unittest/test_uma_lowering_with_umalower.py b/tests/python/unittest/test_uma_lowering_with_umalower.py
index 4470ef723797..210528a0c842 100644
--- a/tests/python/unittest/test_uma_lowering_with_umalower.py
+++ b/tests/python/unittest/test_uma_lowering_with_umalower.py
@@ -14,9 +14,10 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import pytest
 
 import tvm
-from tests.python.unittest.test_uma_utils import _create_schedule, _generate_io_arrays, conv2d_c_code
+from tests.python.unittest.test_uma_utils import _create_schedule, _generate_io_arrays
 from tvm import topi
 from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
 import tvm.testing
@@ -29,13 +30,14 @@ def _conv2d_te_definition(shapes: dict) -> list:
     n, w, h, ci, kw, kh, co = shapes["n"], shapes["w"], shapes["h"], shapes["ci"], shapes["kw"], shapes["kh"], shapes["co"],
     ifmap = te.placeholder((n, ci, w, h), dtype="float32", name="ifmap")
     weights = te.placeholder((co, ci, kw, kh), dtype="float32", name="weights")
-    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=1, dilation=1)
+    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=[kw//2, kh//2], dilation=1)
     return [ifmap, weights, result]
 
 
 def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
-    sch_tir = _create_schedule(placeholders, conv2d_c_code, use_external_conv2d_impl=use_external_conv2d_impl)
+    with open("../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp") as f:
+        sch_tir = _create_schedule(placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl)
     return placeholders, sch_tir
 
 
@@ -67,11 +69,21 @@ def _prepare_io_arrays(conv2d_shapes, dev):
     reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
     return dut_io_arrays, reference_io_arrays
 
-
-def test_lower_with_uma():
+@pytest.mark.parametrize(
+    "n, w, h, ci, kw, kh, co",
+    [
+        (1, 224, 224, 3, 3, 3, 4),
+        (1, 224, 224, 3, 5, 5, 4),
+        (1, 224, 224, 3, 7, 7, 4),
+        (1, 224, 320, 3, 7, 7, 4),
+        (1, 224, 224, 3, 7, 7, 4),
+
+    ],
+)
+def test_lower_with_uma(n, w, h, ci, kw, kh, co):
     target = tvm.target.Target(target="llvm", host="llvm")
     dev = tvm.device(target.kind.name, 0)
-    conv2d_shapes = dict(n=1, w=224, h=224, ci=3, kw=3, kh=3, co=1)
+    conv2d_shapes = dict(n=n, w=w, h=h, ci=ci, kw=kw, kh=kh, co=co)
 
     dut_io_arrays, reference_io_arrays = _prepare_io_arrays(conv2d_shapes, dev)
 
@@ -85,4 +97,4 @@ def test_lower_with_uma():
 
 
 if __name__ == "__main__":
-    test_lower_with_uma()
+    test_lower_with_uma(1, 224, 224, 3, 3, 3, 4)
diff --git a/tests/python/unittest/test_uma_utils.py b/tests/python/unittest/test_uma_utils.py
index ad359cec7c20..15f06d01fa6e 100644
--- a/tests/python/unittest/test_uma_utils.py
+++ b/tests/python/unittest/test_uma_utils.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+import io
 
 import tvm
 from tvm import topi, IRModule
@@ -22,61 +22,12 @@
 from tvm.contrib import utils, clang
 import tvm.testing
 from tvm import te
-
-conv2d_c_code = """
-extern "C" int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result,
-                                   int oc, int iw, int ih, int ic, int kh, int kw) {
-
-  int kw_low = kw / 2;
-  int kh_low = kh / 2;
-  int kw_high = iw + kw / 2;
-  int kh_high = ih + kh / 2;
-
-  int padded_iw = iw + 2 * kw_low; 
-  int padded_ih = ih + 2 * kh_low;
-
-  float* pad_temp = new float[(((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw)];
-
-  if (pad_temp == nullptr) {
-    return -1;
-  }
-
-  int shift = padded_iw * kh_low + kw_low;
-
-  for (int i1 = 0; i1 < ic; ++i1) {
-    for (int i2 = 0; i2 < padded_ih; ++i2) {
-      for (int i3 = 0; i3 < padded_iw; ++i3) {
-        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] = 
-           (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high)) ? ifmap[((((i1 * iw * ih) + (i2 * iw)) + i3) - shift)] : 0.000000e+00f);
-      }
-    }
-  }
-  for (int i11 = 0; i11 < oc; ++i11) { 
-    for (int i21 = 0; i21 < ih; ++i21) { 
-      for (int i31 = 0; i31 < iw; ++i31) { 
-        for (int i4 = 0; i4 < ic; ++i4) { 
-          for (int i5 = 0; i5 < kh; ++i5) { 
-            for (int i6 = 0; i6 < kw; ++i6) { 
-              int cse_var_1 = (((i11 * iw*ih) + (i21 * iw)) + i31);
-              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
-                result[cse_var_1] = 0.000000e+00f;
-              }
-              result[cse_var_1] = (result[cse_var_1] 
-              + (((float*)pad_temp)[i4 * padded_iw * padded_ih + (i21+i5) * padded_iw + i31 + i6] 
-              * weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
-            }
-          }
-        }
-      }
-    }
-  }
-  delete[] pad_temp;
-  return 0;
-}
-"""
+from typing import Union
 
 
-def _create_schedule(placeholder: list, c_code_str: str = "", use_external_conv2d_impl: bool = True):
+def _create_schedule(placeholder: list,
+                     c_code: Union[str, io.TextIOWrapper] = "",
+                     use_external_conv2d_impl: bool = True):
     # How to do the same with TE
     # Add pragma TE
     # s = te.create_schedule(result.op)
@@ -87,6 +38,12 @@ def _create_schedule(placeholder: list, c_code_str: str = "", use_external_conv2
     #
     # llvm_mod = tvm.build(mod, [ifmap, weights, result], target=target, name="test_external_conv2d")
     # llvm_mod(ifmap_data, weight_data, result_data)
+    if isinstance(c_code, io.TextIOWrapper):
+        c_code_str = c_code.read()
+    elif isinstance(c_code, str):
+        c_code_str = c_code
+    else:
+        raise TypeError()
 
     assert use_external_conv2d_impl and c_code_str != "" \
            or not use_external_conv2d_impl and c_code_str == ""

From eeb0516c4ab108a68159e07a8c386cdb9301a013 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 29 Jun 2022 09:07:33 +0000
Subject: [PATCH 047/112] [UMA] testcase

---
 python/tvm/relay/backend/contrib/uma/_template/codegen.py | 2 +-
 python/tvm/relay/backend/contrib/uma/_template/passes.py  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
index 16d3ef9f5f86..d6587b55ea13 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
@@ -21,7 +21,7 @@
 
 def gen_includes() -> str:
     includes = ""
-    includes += "#include \"conv2dnchw.cpp\""
+    includes += "#include \"../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp\""
     return includes
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 06507c8189da..4f9f712332b1 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -77,7 +77,6 @@ def _replace_conv2d(op):
 
             if _has_block(_tvm_block_match_name, func):
                 conv2d_block = sch.get_block(_tvm_block_match_name)
-
                 rv_loops = sch.get_loops(conv2d_block)
                 assert len(rv_loops) == 7
                 loops = dict(

From 5f9680daba6a37f12cd3d73a50dc8c685e21618e Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 29 Jun 2022 15:13:36 +0000
Subject: [PATCH 048/112] [UMA] uma lower.py: replaced outdated function
 create_prim_func_from_outputs to be compatible withe latest content of "main"

---
 .../backend/contrib/uma/_template/backend.py  |  2 +-
 .../relay/backend/contrib/uma/api/lower.py    | 26 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 7880f1c69b30..1c5ce9473a52 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -41,7 +41,7 @@ def __init__(self):
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
-        #self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
 
 
         #######################################################################
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index d1afa8879c30..5eeaf7d49daf 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -57,9 +57,33 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
+        def _get_tensors(te_cached_func):
+            outputs = list(te_cached_func.outputs)
+            stack = []
+            visited = set()
+            for o in outputs:
+                if o not in visited:
+                    visited.add(o)
+                    stack.append(o)
+
+            args = []
+            while len(stack) != 0:
+                tensor = stack.pop()
+                if isinstance(tensor.op, tvm.te.tensor.PlaceholderOp):
+                    args.append(tensor)
+                elif isinstance(tensor.op, tvm.te.tensor.ComputeOp):
+                    inputs = tensor.op.input_tensors
+                    for i0 in inputs:
+                        if i0 not in visited:
+                            visited.add(i0)
+                            stack.append(i0)
+
+            return args + outputs
+
         f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
         te_cached_func = f(relay_prim_func)
-        tir_prim_func = te.create_prim_func_from_outputs(te_cached_func.outputs)
+        x = _get_tensors(te_cached_func)
+        tir_prim_func = te.create_prim_func(x)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )

From d2cae76a7e5f14acf2554a4cb86350ccc7e4d725 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Tue, 5 Jul 2022 18:34:49 +0200
Subject: [PATCH 049/112] UMA: Move torch import to top to avoid free():
 invalid pointer error

---
 python/tvm/relay/backend/contrib/uma/run.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
index 550e05ade818..1071883faeda 100644
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ b/python/tvm/relay/backend/contrib/uma/run.py
@@ -1,8 +1,10 @@
+import torch
+
 import tvm
 from tvm import relay
 from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
 
-import torch
+
 import tarfile
 from pathlib import Path
 

From ecc4a04acbcdb16569db03a9787cbf08966a399c Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Tue, 5 Jul 2022 18:49:12 +0200
Subject: [PATCH 050/112] Add stub files for targets

---
 .../python/contrib/test_uma/test_partition.py | 19 +++++++++++++++++++
 tests/python/contrib/test_uma/test_target.py  | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 tests/python/contrib/test_uma/test_partition.py
 create mode 100644 tests/python/contrib/test_uma/test_target.py

diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
new file mode 100644
index 000000000000..861bf5ff4da2
--- /dev/null
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# TODO: cgerum
\ No newline at end of file
diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
new file mode 100644
index 000000000000..861bf5ff4da2
--- /dev/null
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# TODO: cgerum
\ No newline at end of file

From 40dd8207cfbaa1960738661ce2420c403ec59617 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Tue, 5 Jul 2022 18:59:40 +0200
Subject: [PATCH 051/112] Add tests for ultratrail codegen

---
 .../python/contrib/test_ultra_trail_codegen.py                    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py => tests/python/contrib/test_ultra_trail_codegen.py (100%)

diff --git a/python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py b/tests/python/contrib/test_ultra_trail_codegen.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/test_uma/test_uma.py
rename to tests/python/contrib/test_ultra_trail_codegen.py

From d86fd96dbec2feb1c241f270d202515d82d7ec08 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Tue, 5 Jul 2022 19:07:30 +0200
Subject: [PATCH 052/112] Adopt my_ai_hw accelerator for new target definition

---
 python/tvm/relay/backend/contrib/uma/_template/backend.py | 2 +-
 python/tvm/relay/backend/contrib/uma/_template/run.py     | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 1c5ce9473a52..8cdd2bd19e4c 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -48,7 +48,7 @@ def __init__(self):
         # TIR to runtime function registration
         #######################################################################
         self._register_codegen(
-            fmt="c", includes=gen_includes #, replace_call_extern=gen_replace_call_extern
+            fmt="c", includes=gen_includes
         )
 
     @property
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index 4b84f1ed2752..ef0f0f1e35fd 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -51,7 +51,8 @@ def uma_compiler(ref):
     mod = uma_backend.partition(mod)
 
     # Relay build (AOT C target)
-    TARGET = "c"
+    TARGET = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
+    GENERIC_TARGET = tvm.target.Target("c")
     RUNTIME = tvm.relay.backend.Runtime("crt")
     EXECUTOR = tvm.relay.backend.Executor(
         "aot",
@@ -69,7 +70,7 @@ def uma_compiler(ref):
                 },
         disabled_pass=["AlterOpLayout"]
     ):
-        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
+        module = relay.build(mod, target=[GENERIC_TARGET, TARGET], runtime=RUNTIME, executor=EXECUTOR, params=params)
 
     model_library_format_tar_path = Path("build/lib.tar")
     model_library_format_tar_path.unlink(missing_ok=True)

From 2f88286aeeead4e6a115657a878e19c6cc37ae4b Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 6 Jul 2022 10:39:12 +0200
Subject: [PATCH 053/112] Add unit test for target attributes

---
 tests/python/contrib/test_uma/test_target.py | 28 +++++++++++++++++++-
 tests/scripts/task_config_build_cortexm.sh   |  2 ++
 tests/scripts/task_config_build_cpu.sh       |  1 +
 tests/scripts/task_python_uma.sh             | 24 +++++++++++++++++
 4 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100755 tests/scripts/task_python_uma.sh

diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index 861bf5ff4da2..4f958f15ddd3 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -15,5 +15,31 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import pytest
+import tvm
 
-# TODO: cgerum
\ No newline at end of file
+@pytest.mark.parametrize(
+    "target_name,target_attrs,target_args",
+    [("my_hwa", {}, {}),
+     ("my_hwa2", {"local_memory_size": 128*1024}, {"local_memory_size": 256*1024})]
+)
+def test_uma_target(target_name, target_attrs, target_args):
+    registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
+    registration_func(target_name, target_attrs)
+
+    # Test Defaults
+    my_target = tvm.target.Target(target_name)
+
+    assert str(my_target.kind) == target_name
+    
+    for attr in target_attrs.keys():
+        assert my_target.attrs[attr] == target_attrs[attr]
+
+
+    # Test with parameters overwritten
+
+    args = " ".join((F"--{k}={v}" for k,v in target_args.items()))
+    my_target = tvm.target.Target(f"{target_name} {args}")
+
+    for attr in target_args.keys():
+        assert my_target.attrs[attr] == target_args[attr]
\ No newline at end of file
diff --git a/tests/scripts/task_config_build_cortexm.sh b/tests/scripts/task_config_build_cortexm.sh
index 29869983b86d..35dbd82110cd 100755
--- a/tests/scripts/task_config_build_cortexm.sh
+++ b/tests/scripts/task_config_build_cortexm.sh
@@ -27,9 +27,11 @@ echo set\(USE_SORT ON\) >> config.cmake
 echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_CMSISNN ON\) >> config.cmake
 echo set\(USE_ETHOSU ON\) >> config.cmake
+echo set\(USE_UMA ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-10\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
 echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
+
diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index 84213be860dc..9dc5c62efaa7 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -48,4 +48,5 @@ echo set\(USE_VERILATOR ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE ON\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
 echo set\(USE_ETHOSU ON\) >> config.cmake
+echo set\(USE_UMA ON\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
diff --git a/tests/scripts/task_python_uma.sh b/tests/scripts/task_python_uma.sh
new file mode 100755
index 000000000000..66dd0587af56
--- /dev/null
+++ b/tests/scripts/task_python_uma.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euxo pipefail
+
+source tests/scripts/setup-pytest-env.sh
+
+run_pytest ctypes test_uma tests/python/contrib/test_uma
+run_pytest cython3 test_uma  tests/python/contrib/test_uma
\ No newline at end of file

From 9a6e020be71c08abaa3c33cfead8e50a25d4221c Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 6 Jul 2022 11:05:02 +0200
Subject: [PATCH 054/112] Test string arguments

---
 tests/python/contrib/test_uma/test_target.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index 4f958f15ddd3..0b86b1e11979 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -20,8 +20,20 @@
 
 @pytest.mark.parametrize(
     "target_name,target_attrs,target_args",
-    [("my_hwa", {}, {}),
-     ("my_hwa2", {"local_memory_size": 128*1024}, {"local_memory_size": 256*1024})]
+    [
+        ("my_hwa", {}, {}),
+        (
+            "my_hwa2", 
+            {
+                "local_memory_size": 128*1024,
+                "variant": "version1",
+            }, 
+            {
+                "local_memory_size": 256*1024, 
+                "variant": "version2"
+            }
+        )
+    ]
 )
 def test_uma_target(target_name, target_attrs, target_args):
     registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")

From 79577f221a9828acb0e1ab2b17e4c5ef2bab6511 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 6 Jul 2022 11:12:33 +0200
Subject: [PATCH 055/112] Extend target test

---
 tests/python/contrib/test_uma/test_target.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index 0b86b1e11979..22c24414ff21 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -47,9 +47,7 @@ def test_uma_target(target_name, target_attrs, target_args):
     for attr in target_attrs.keys():
         assert my_target.attrs[attr] == target_attrs[attr]
 
-
     # Test with parameters overwritten
-
     args = " ".join((F"--{k}={v}" for k,v in target_args.items()))
     my_target = tvm.target.Target(f"{target_name} {args}")
 

From f48e3f0a217d6c16fe3ad0bacf8338ee6c7e3cd3 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 1 Jul 2022 16:37:24 +0000
Subject: [PATCH 056/112] [UMA] tutorial first versin

---
 .../tvm/relay/backend/contrib/uma/tutorial.md | 146 ++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 python/tvm/relay/backend/contrib/uma/tutorial.md

diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
new file mode 100644
index 000000000000..e3cc31528773
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/tutorial.md
@@ -0,0 +1,146 @@
+Making your hardware accelerator TVM-ready with UMA 
+=============================================
+This tutorial will give you step-by-step guidance how to use UMA to
+make your hardware accelerator TVM-ready.
+While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
+API to integrate a number of hardware accelerator classes into TVM.
+
+In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
+We call the accelerators in these cases **Vanilla**, **Strawberry** and **Chocolate**. 
+
+Prerequisites
+---
+
+```
+git clone https://github.com/apache/tvm.git
+pip install 
+```
+
+Vanilla
+===
+**Vanilla** is a simple accelerator consisting of a MAC array, that can ONLY process Conv2D layers.
+All other layers are executed on a CPU, that also orchestrates **Vanilla**.
+
+For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accepts pointers to input data *if_map*,
+*weights* and *result* data, as well as the parameters of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
+```c
+int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
+```
+
+The script `uma_cli` creates you code skeletons with API-calls into the UMA-API for your accelerator.
+
+```
+cd tvm/python/tvm/relay/backend/contrib/uma
+python uma_cli.py --add-accelerator vanilla_accelerator --template vanilla
+```
+The option `--template vanilla` adds all the additional files required for this tutorial.
+
+```
+$ ls tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator
+
+backend.py
+codegen.py
+passes.py
+patterns.py
+strategies.py
+```
+
+Step 1: Vanilla backend
+---
+This snippet is a full backed for **Vanilla**:
+```python
+class VanillaAccelerator(UMABackend):
+    """UMA backend for VanillaAccelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        self._register_pattern("conv2d", conv2d_pattern())
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2DPass())
+        self._register_codegen(fmt="c", includes=gen_includes)
+
+    @property
+    def target_name(self):
+        return "vanilla_accelerator"
+```
+It is found in `tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator/backend.py`.
+
+Step 2: Define offloaded patterns
+---
+
+To specify that `Conv2D` is offloaded to **Vanilla**, we describe it as Relay dataflow pattern in 
+`patterns.py` 
+ [[DFPattern]](https://tvm.apache.org/docs/reference/langref/relay_pattern.html) 
+:
+```python
+def conv2d_pattern():
+    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
+    pattern = pattern.has_attr({"strides": [1, 1]})
+    return pattern
+```
+
+To map Conv2D operations from Tensorflow input files to **Vanilla**'s 
+low level function call, we are using the TIR pass 
+*VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial).
+
+Step 3: Modify Codegen
+---
+```
+self._register_codegen(fmt="c", includes=gen_includes)
+```
+
+We tell TVM to create C code using ``fmt="c"`` via 
+`self._register_codegen`. Since we specified `Conv2D` layers to be called via our 
+own implementation `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
+`#include` statement.
+
+This is done by providing the include-string like this:
+```python
+# in vanilla_accelerator/backend.py
+self._register_codegen(fmt="c", includes=gen_includes)
+
+# in vanilla_accelerator/codegen.py
+def gen_includes() -> str:
+    return "#include \"conv2dnchw.cpp\""
+```        
+
+
+Step 4: Build the NN
+---
+Now we are going to generate C code for an MNIST-12 NN using.
+For this, run `vanilla_accelerator/run.py`.
+This creates the directory `build` that contains the generated data in the model library format (MLF).
+```
+$cd build/
+$ ls -1
+codegen
+lib.tar
+metadata.json
+parameters
+runtime
+src
+```
+To evaluate the generated C code go to 
+```
+$ cd codegen/host/src/
+$ ls -1
+default_lib0.c
+default_lib1.c
+default_lib2.c
+default_lib3.c
+```
+
+Run a 
+
+More
+---
+Did this tutorial **not** fit to your accelerator? Please add your requirements to the UMA thread in
+the TVM discuss forum: [Link](https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039).
+We are eager to extend this tutorial to provide guidance on making further classes of AI hardware
+accelerators TVM-ready using the UMA interface.
+
+References
+---
+[UMA-RFC] [UMA: Universal Modular Accelerator Interface](https://github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md), TVM RFC, June 2022.
+
+[DFPattern] [Pattern Matching in Relay](https://tvm.apache.org/docs/reference/langref/relay_pattern.html) 

From e117b7c972e69d3c769de5cca773527b103cd4bc Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 5 Jul 2022 14:19:03 +0000
Subject: [PATCH 057/112] [UMA] moved unit tests to contrib

---
 .../test_uma}/test_uma_lowering_with_umalower.py          | 8 ++++----
 .../{unittest => contrib/test_uma}/test_uma_utils.py      | 0
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename tests/python/{unittest => contrib/test_uma}/test_uma_lowering_with_umalower.py (91%)
 rename tests/python/{unittest => contrib/test_uma}/test_uma_utils.py (100%)

diff --git a/tests/python/unittest/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
similarity index 91%
rename from tests/python/unittest/test_uma_lowering_with_umalower.py
rename to tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index 210528a0c842..2760084eecba 100644
--- a/tests/python/unittest/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -17,9 +17,9 @@
 import pytest
 
 import tvm
-from tests.python.unittest.test_uma_utils import _create_schedule, _generate_io_arrays
+from tests.python.contrib.test_uma.test_uma_utils import _create_schedule, _generate_io_arrays
 from tvm import topi
-from tvm.relay.backend.contrib.uma._template.passes import my_ai_hw_conv2d_pass
+from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass
 import tvm.testing
 from tvm import te
 from tvm.relay.backend.contrib.uma.api.lower import UMALower
@@ -36,7 +36,7 @@ def _conv2d_te_definition(shapes: dict) -> list:
 
 def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
-    with open("../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp") as f:
+    with open("../../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp") as f:
         sch_tir = _create_schedule(placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl)
     return placeholders, sch_tir
 
@@ -46,7 +46,7 @@ def _run_external_conv2d(dut_io_arrays, conv2d_shapes, target):
     placeholders, schedule = _pepare_conv2d_schedule(conv2d_shapes)
 
     uma_lower = UMALower("lower_test")
-    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass()))
+    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, MyAiHwConv2dPass()))
     with tvm.transform.PassContext():
         tir_mod = uma_lower._lower_stir_to_nstir(schedule.mod["main"])
 
diff --git a/tests/python/unittest/test_uma_utils.py b/tests/python/contrib/test_uma/test_uma_utils.py
similarity index 100%
rename from tests/python/unittest/test_uma_utils.py
rename to tests/python/contrib/test_uma/test_uma_utils.py

From 5758c7b0436f921d8d10dae8f025d95da6616736 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 6 Jul 2022 09:15:18 +0000
Subject: [PATCH 058/112] [UMA] renaming interfaces

---
 .../backend/contrib/uma/_template/backend.py  |  4 +--
 .../contrib/uma/_template/conv2dnchw.cpp      |  2 +-
 .../backend/contrib/uma/_template/passes.py   |  4 +--
 .../backend/contrib/uma/_template/run.py      | 29 +++++++++++++++----
 4 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 8cdd2bd19e4c..3857376b6344 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """UMA backend for the my_ai_hw accelerator"""
-from .passes import my_ai_hw_conv2d_pass
+from .passes import MyAiHwConv2dPass
 from ..api.utils import PassPhase
 from ..backend import UMABackend
 from .codegen import gen_includes, gen_replace_call_extern
@@ -41,7 +41,7 @@ def __init__(self):
         #######################################################################
         # Relay to TIR function registration
         #######################################################################
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, my_ai_hw_conv2d_pass())
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, MyAiHwConv2dPass())
 
 
         #######################################################################
diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
index fa3d6a5295d8..b941b5b50b8b 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
@@ -20,7 +20,7 @@
 #ifdef __cplusplus
 extern "C"
 #endif
-int my_hw_ai_conv2dnchw(float* ifmap, float*  weights, float*  result,
+int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
                                    int oc, int iw, int ih, int ic, int kh, int kw) {
 
   int kw_low = kw / 2;
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 4f9f712332b1..8e02a8fbd983 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -22,7 +22,7 @@
 
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
-class my_ai_hw_conv2d_pass:
+class MyAiHwConv2dPass:
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
@@ -34,7 +34,7 @@ def _my_ai_hw_conv2d_pass(func, mod, ctx):
         _loops = dict()
         _handles = []
         _entry_node = None
-        _external_function_name = "my_hw_ai_conv2dnchw"
+        _external_function_name = "my_ai_hw_conv2dnchw"
         _tvm_block_match_name = "conv2d_nchw"
 
         def _has_block(name: str, func) -> bool:
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index ef0f0f1e35fd..0480c73e3647 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -16,7 +16,7 @@
 # under the License.
 
 import tvm
-from tvm import relay
+from tvm import relay, IRModule
 from tvm.contrib.download import download_testdata
 from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
 
@@ -27,16 +27,31 @@
 import onnx
 
 
-def main():
+def import_mnist12() -> [IRModule, dict]:
+    model_url = "".join(
+        ["https://github.com/onnx/models/raw/main/vision/classification/mnist/model/mnist-12.onnx"])
+    model_path = download_testdata(model_url, "mnist-12.onnx", module="onnx")
+    onnx_model = onnx.load(model_path)
+    input_name = "Input3"
+    shape_dict = {input_name: (1, 1, 28, 28)}
+    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+    return mod, params
+
+
+def import_restnet50() -> [IRModule, dict]:
     model_url = "".join(
         ["https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v2-7.onnx"])
     model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
-    # now you have the onnx model on disk
     onnx_model = onnx.load(model_path)
-
     input_name = "data"
     shape_dict = {input_name: (1, 3, 224, 224)}
     mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+    return mod, params
+
+
+def main():
+
+    mod, params = import_mnist12()
 
     print(mod)
 
@@ -58,13 +73,14 @@ def uma_compiler(ref):
         "aot",
         {
             "workspace-byte-alignment": 8,
+            #"unpacked-api": True
         },
     )
 
     with tvm.transform.PassContext(
         opt_level=3,
         config={"tir.disable_vectorize": True,
-                "tir.disable_storage_rewrite": True,
+                #"tir.disable_storage_rewrite": True,
                 "tir.usmp.enable": True,
                 "tir.usmp.algorithm": "greedy_by_conflicts"
                 },
@@ -84,5 +100,8 @@ def uma_compiler(ref):
         tar_f.extractall(model_library_format_tar_path.parent)
 
 
+
+
+
 if __name__ == "__main__":
     main()

From 688f4c805ab0e1087073cf132a724bea28626a4d Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 6 Jul 2022 12:46:11 +0200
Subject: [PATCH 059/112] Fix umalower_tests in ci

---
 python/tvm/relay/backend/contrib/uma/_template/codegen.py  | 6 ++++--
 .../contrib/test_uma/test_uma_lowering_with_umalower.py    | 7 ++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
index d6587b55ea13..6cf33d42616e 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
@@ -17,11 +17,13 @@
 """UMA codegen for the my_ai_hw accelerator"""
 
 import tvm
-
+import pathlib
 
 def gen_includes() -> str:
+    topdir = pathlib.Path(__file__).parent.absolute()
+    
     includes = ""
-    includes += "#include \"../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp\""
+    includes += f"#include \"{topdir}/conv2dnchw.cpp\""
     return includes
 
 
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index 2760084eecba..aa129d387231 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import pytest
+import pathlib
 
 import tvm
 from tests.python.contrib.test_uma.test_uma_utils import _create_schedule, _generate_io_arrays
@@ -36,7 +37,11 @@ def _conv2d_te_definition(shapes: dict) -> list:
 
 def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
-    with open("../../../../python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp") as f:
+
+    uma_path = pathlib.Path(str(tvm.relay.backend.contrib.uma.__file__)).parent.absolute()
+    conv2d_file = uma_path / "_template" / "conv2dnchw.cpp"
+
+    with conv2d_file.open() as f:
         sch_tir = _create_schedule(placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl)
     return placeholders, sch_tir
 

From 7551a0e397982393835e264fc645e98db97e9475 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 6 Jul 2022 12:52:27 +0200
Subject: [PATCH 060/112] make uma a python module

---
 python/tvm/relay/backend/contrib/uma/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/__init__.py

diff --git a/python/tvm/relay/backend/contrib/uma/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1

From 8c4f0654a00c9222bb60ddb0fb6342916f8f5fa2 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 7 Jul 2022 15:57:04 +0000
Subject: [PATCH 061/112] [UMA] Update of UMAv1 API + added testcases +
 tutorialV1

---
 .../contrib/uma/_template/conv2dnchw.cpp      |   7 +-
 .../backend/contrib/uma/_template/run.py      | 121 ++++++++----------
 .../tvm/relay/backend/contrib/uma/tutorial.md |  86 +++++++++----
 .../tvm/relay/backend/contrib/uma/uma_cli.py  |  75 +++++++++++
 python/tvm/testing/aot.py                     |  12 +-
 .../contrib/test_uma/test_uma_pipeline.py     | 103 +++++++++++++++
 .../test_uma/test_uma_vanilla_accelerator.py  |  67 ++++++++++
 7 files changed, 367 insertions(+), 104 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/uma_cli.py
 create mode 100644 tests/python/contrib/test_uma/test_uma_pipeline.py
 create mode 100644 tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py

diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
index b941b5b50b8b..20035fa2aa9a 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
@@ -16,6 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 */
+#include <stdlib.h>
 
 #ifdef __cplusplus
 extern "C"
@@ -31,9 +32,9 @@ int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
   int padded_iw = iw + 2 * kw_low;
   int padded_ih = ih + 2 * kh_low;
 
-  float* pad_temp = new float[(((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw)];
+  float* pad_temp = (float*) malloc((((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
 
-  if (pad_temp == nullptr) {
+  if (pad_temp == NULL) {
     return -1;
   }
 
@@ -64,6 +65,6 @@ int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
       }
     }
   }
-  delete[] pad_temp;
+  free(pad_temp);
   return 0;
 }
\ No newline at end of file
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index 0480c73e3647..a044eb870b7e 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -14,94 +14,75 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
+
+from tvm.testing.aot import compile_and_run, AOTTestModel, AOTTestRunner
 
 import tvm
-from tvm import relay, IRModule
-from tvm.contrib.download import download_testdata
+from tvm import relay
 from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
+from tvm.relay import transform
+from collections import OrderedDict
 
 import numpy as np
 import tarfile
 from pathlib import Path
-
 import onnx
 
-
-def import_mnist12() -> [IRModule, dict]:
-    model_url = "".join(
-        ["https://github.com/onnx/models/raw/main/vision/classification/mnist/model/mnist-12.onnx"])
-    model_path = download_testdata(model_url, "mnist-12.onnx", module="onnx")
-    onnx_model = onnx.load(model_path)
-    input_name = "Input3"
-    shape_dict = {input_name: (1, 1, 28, 28)}
-    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
-    return mod, params
-
-
-def import_restnet50() -> [IRModule, dict]:
-    model_url = "".join(
-        ["https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v2-7.onnx"])
-    model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
-    onnx_model = onnx.load(model_path)
-    input_name = "data"
-    shape_dict = {input_name: (1, 3, 224, 224)}
-    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
-    return mod, params
+from tvm.testing.aot import (
+    AOTTestModel,
+    AOTTestRunner,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
+    dtype = "float32"
+    ishape = (1, 32, 14, 14)
+    wshape = (32, weight_shape, 3, 3)
+    pass_config = {"tir.usmp.enable": True}
+    test_runner = AOTTestRunner(
+        makefile=test_runner.makefile,
+        prologue=test_runner.prologue,
+        epilogue=test_runner.epilogue,
+        includes=test_runner.includes,
+        parameters=test_runner.parameters,
+        pass_config=pass_config,
+    )
+    data0 = relay.var("data", shape=ishape, dtype=dtype)
+    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
+    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
+    main_f = relay.Function([data0, weight0], out)
+    mod = tvm.IRModule()
+    mod["main"] = main_f
+    mod = transform.InferType()(mod)
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
+    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
+    output_list = generate_ref_data(mod, inputs)
+    return mod, inputs, output_list, test_runner
 
 
 def main():
+    mod, inputs, output_list, test_runner = create_conv2d()
 
-    mod, params = import_mnist12()
-
-    print(mod)
-
-    # Relay target specific partitioning
     uma_backend = MyAiHwBackend()
     uma_backend.register()
-
-    @tvm.register_func("relay.ext.my_ai_hw")
-    def uma_compiler(ref):
-        print(ref)
-
     mod = uma_backend.partition(mod)
-
-    # Relay build (AOT C target)
-    TARGET = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
-    GENERIC_TARGET = tvm.target.Target("c")
-    RUNTIME = tvm.relay.backend.Runtime("crt")
-    EXECUTOR = tvm.relay.backend.Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": 8,
-            #"unpacked-api": True
-        },
+    target = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
+
+    export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
+    print(f"Generated files are in {export_directory}")
+    compile_and_run(
+        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+        test_runner,
+        interface_api="c",
+        use_unpacked_api=True,
+        target=target,
+        test_dir=str(export_directory)
     )
 
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={"tir.disable_vectorize": True,
-                #"tir.disable_storage_rewrite": True,
-                "tir.usmp.enable": True,
-                "tir.usmp.algorithm": "greedy_by_conflicts"
-                },
-        disabled_pass=["AlterOpLayout"]
-    ):
-        module = relay.build(mod, target=[GENERIC_TARGET, TARGET], runtime=RUNTIME, executor=EXECUTOR, params=params)
-
-    model_library_format_tar_path = Path("build/lib.tar")
-    model_library_format_tar_path.unlink(missing_ok=True)
-    model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
-
-    tvm.micro.export_model_library_format(module, model_library_format_tar_path)
-
-    print("Built MLF Library: ")
-    with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
-        print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
-        tar_f.extractall(model_library_format_tar_path.parent)
-
-
-
-
 
 if __name__ == "__main__":
     main()
diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
index e3cc31528773..1d7580501126 100644
--- a/python/tvm/relay/backend/contrib/uma/tutorial.md
+++ b/python/tvm/relay/backend/contrib/uma/tutorial.md
@@ -1,25 +1,26 @@
 Making your hardware accelerator TVM-ready with UMA 
 =============================================
+
+**Disclaimer**: *This is an early preliminary version of this tutorial. Feel free to aks questions or give feedback via the UMA thread in the TVM
+discussion forum [[link](https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039)].*
+
+
 This tutorial will give you step-by-step guidance how to use UMA to
 make your hardware accelerator TVM-ready.
 While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
 API to integrate a number of hardware accelerator classes into TVM.
 
 In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
-We call the accelerators in these cases **Vanilla**, **Strawberry** and **Chocolate**. 
-
-Prerequisites
----
+In these use case the three mock-accelerators
+**Vanilla**, **Strawberry** and **Chocolate** are introduced and
+integrated into TVM using UMA. 
 
-```
-git clone https://github.com/apache/tvm.git
-pip install 
-```
 
 Vanilla
 ===
-**Vanilla** is a simple accelerator consisting of a MAC array, that can ONLY process Conv2D layers.
-All other layers are executed on a CPU, that also orchestrates **Vanilla**.
+**Vanilla** is a simple accelerator consisting of a MAC array and has no internal memory.
+It is can ONLY process Conv2D layers, all other layers are executed on a CPU, that also orchestrates **Vanilla**.
+Both the CPU and Vanilla use a shared memory.
 
 For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accepts pointers to input data *if_map*,
 *weights* and *result* data, as well as the parameters of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
@@ -27,21 +28,24 @@ For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accept
 int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
 ```
 
-The script `uma_cli` creates you code skeletons with API-calls into the UMA-API for your accelerator.
+The script `uma_cli` creates code skeletons with API-calls into the UMA-API for new accelerators.
+For **Vanilla** we use it like this:
 
 ```
 cd tvm/python/tvm/relay/backend/contrib/uma
-python uma_cli.py --add-accelerator vanilla_accelerator --template vanilla
+python uma_cli.py --add-accelerator vanilla_accelerator --tutorial vanilla
 ```
-The option `--template vanilla` adds all the additional files required for this tutorial.
+The option `--tutorial vanilla` adds all the additional files required for this part of the tutorial.
 
 ```
 $ ls tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator
 
 backend.py
 codegen.py
+conv2dnchw.cpp
 passes.py
 patterns.py
+run.py
 strategies.py
 ```
 
@@ -49,7 +53,7 @@ Step 1: Vanilla backend
 ---
 This snippet is a full backed for **Vanilla**:
 ```python
-class VanillaAccelerator(UMABackend):
+class VanillaAcceleratorBackend(UMABackend):
     """UMA backend for VanillaAccelerator."""
 
     def __init__(self):
@@ -79,9 +83,10 @@ def conv2d_pattern():
     return pattern
 ```
 
-To map Conv2D operations from Tensorflow input files to **Vanilla**'s 
-low level function call, we are using the TIR pass 
-*VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial).
+To map **Conv2D** operations from input graph  to **Vanilla**'s 
+low level function call, TIR pass 
+*VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial)
+is registered in `VanillaAcceleratorBackend`.
 
 Step 3: Modify Codegen
 ---
@@ -90,8 +95,8 @@ self._register_codegen(fmt="c", includes=gen_includes)
 ```
 
 We tell TVM to create C code using ``fmt="c"`` via 
-`self._register_codegen`. Since we specified `Conv2D` layers to be called via our 
-own implementation `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
+`self._register_codegen`. As `Conv2D` layers should be executed via Vanilla's
+C interface `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
 `#include` statement.
 
 This is done by providing the include-string like this:
@@ -105,13 +110,26 @@ def gen_includes() -> str:
 ```        
 
 
-Step 4: Build the NN
+Step 4: Building the Neural Network and run it on Vanilla
 ---
-Now we are going to generate C code for an MNIST-12 NN using.
-For this, run `vanilla_accelerator/run.py`.
-This creates the directory `build` that contains the generated data in the model library format (MLF).
+In this step we generate C code for a single Conv2D layer and run it on
+the Vanilla accelerator.
+The file `vanilla_accelerator/run.py` provides a demo running a Conv2D layer 
+making use of Vanilla's C-API.
+
+By running `vanilla_accelerator/run.py` the output files are generated in the model library format (MLF).
+
+
+Output:
 ```
-$cd build/
+Generated files are in /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
+```
+
+Let's examine the generated files:
+
+```
+$ cd /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
+$ cd build/
 $ ls -1
 codegen
 lib.tar
@@ -120,17 +138,31 @@ parameters
 runtime
 src
 ```
-To evaluate the generated C code go to 
+To evaluate the generated C code go to `codegen/host/src/`
 ```
 $ cd codegen/host/src/
 $ ls -1
 default_lib0.c
 default_lib1.c
 default_lib2.c
-default_lib3.c
 ```
+In `default_lib2.c` you can now see that the generated code calls
+into Vanilla's C-API
+```c
+TVM_DLL int32_t tvmgen_default_vanilla_accelerator_main_0(float* placeholder, float* placeholder1, float* conv2d_nchw, uint8_t* global_workspace_1_var) {
+  vanilla_accelerator_conv2dnchw(placeholder, placeholder1, conv2d_nchw, 32, 14, 14, 32, 3, 3);
+  return 0;
+}
+```
+
 
-Run a 
+Strawberry
+---
+TBD
+
+Chocolate
+---
+TBD
 
 More
 ---
diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/python/tvm/relay/backend/contrib/uma/uma_cli.py
new file mode 100644
index 000000000000..e5fe6d107799
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/uma/uma_cli.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import os
+import shutil
+from inflection import camelize, underscore
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="UMA Interface command line interface")
+    parser.add_argument(
+        "--add_hardware",
+        type=str,
+        required=True,
+    )
+    parser.add_argument(
+        "--tutorial",
+        type=str,
+    )
+    args = parser.parse_args()
+    return args
+
+
+def replace_template_name(files: list, template_name: str, add_hw_name: str, template_source: str = "_template") -> None:
+    for f in files:
+        with open(f) as read_file:
+            data = read_file.read()
+        for case in [underscore, camelize]:
+            data = data.replace(case(template_name), case(add_hw_name))
+        data = data.replace(template_source, underscore(add_hw_name))
+        with open(f, "w") as write_file:
+            write_file.write(data)
+
+
+def main():
+    args = _parse_args()
+    add_hw_name = args.add_hardware
+    add_hw_path = os.path.join(os.getcwd(), add_hw_name)
+    if os.path.exists(add_hw_path):
+        raise ValueError(f"Hardware with name {add_hw_name} already exists in UMA file structure")
+    else:
+        os.mkdir(add_hw_name)
+
+    uma_template_path = "_template"
+    uma_files = ["backend.py", "codegen.py", "passes.py", "patterns.py", "run.py", "strategies.py"]
+    if args.tutorial == "vanilla":
+        uma_files.append("conv2dnchw.cpp")
+
+    source_files = [os.path.join(uma_template_path, f) for f in uma_files]
+    destination_files = [os.path.join(add_hw_path, f) for f in uma_files]
+
+    for src, dst in zip(source_files, destination_files):
+        shutil.copyfile(src, dst)
+
+    template_name = "my_ai_hw"
+    replace_template_name(destination_files, template_name, add_hw_name)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index a87e61666d35..d1025bfe600c 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -837,8 +837,9 @@ def run_and_check_body(base_path):
             assert AOT_SUCCESS_TOKEN in run_log.read()
 
     if test_dir is None:
-        with tempfile.TemporaryDirectory() as tmpdir:
-            run_and_check_body(os.path.join(tmpdir, "test"))
+        tmpdir = utils.tempdir(keep_for_debug=True)
+        print(f"TEMP DIR {tmpdir.path}")
+        run_and_check_body(os.path.join(tmpdir.path, "test"))
     else:
         run_and_check_body(test_dir)
 
@@ -854,7 +855,7 @@ def compile_and_run(
     enable_op_fusion: bool = True,
     data_linkage: AOTDataLinkage = None,
     use_runtime_executor: bool = True,
-    target: str = "c",
+    target: Union[str, tvm.target.Target, List[tvm.target.Target]] = "c",
     target_opts: Dict = None,
     test_dir: str = None,
     verbose: bool = False,
@@ -870,6 +871,9 @@ def compile_and_run(
         Prints commands to build and run AOT test runner
     """
 
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+
     if target_opts:
         for key, val in target_opts.items():
             target += f" {key}={val}"
@@ -883,7 +887,7 @@ def compile_and_run(
         enable_op_fusion=enable_op_fusion,
         pass_config=runner.pass_config,
         use_runtime_executor=use_runtime_executor,
-        target=tvm.target.Target(target),
+        target=target,
         schedule_name=schedule_name,
     )
 
diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
new file mode 100644
index 000000000000..a3770968bb55
--- /dev/null
+++ b/tests/python/contrib/test_uma/test_uma_pipeline.py
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
+from tvm.relay import transform
+from tvm.testing.aot import (
+    AOTTestModel,
+    AOTTestRunner,
+    generate_ref_data,
+    compile_and_run,
+)
+
+import tvm
+from test_uma_vanilla_accelerator import VanillaAcceleratorBackend
+from tvm import relay
+import numpy as np
+from collections import OrderedDict
+
+
+@pytest.mark.parametrize(
+    "interface_api,use_unpacked_api,test_runner,groups,weight_shape",
+    [("c", True, AOT_DEFAULT_RUNNER, 1, 32)],
+)
+def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape):
+    """Test a subgraph with a single conv2d operator."""
+    mod, inputs, output_list, test_runner = create_conv2d(groups, test_runner, weight_shape)
+
+    uma_backend = VanillaAcceleratorBackend()
+    uma_backend.register()
+    mod = uma_backend.partition(mod)
+    target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
+
+    compile_and_run(
+        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+        test_runner,
+        interface_api,
+        use_unpacked_api,
+        target=target
+    )
+
+
+def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
+    dtype = "float32"
+    ishape = (1, 32, 14, 14)
+    wshape = (32, weight_shape, 3, 3)
+    pass_config = {"tir.usmp.enable": True}
+    test_runner = AOTTestRunner(
+        makefile=test_runner.makefile,
+        prologue=test_runner.prologue,
+        epilogue=test_runner.epilogue,
+        includes=test_runner.includes,
+        parameters=test_runner.parameters,
+        pass_config=pass_config,
+    )
+    data0 = relay.var("data", shape=ishape, dtype=dtype)
+    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
+    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
+    main_f = relay.Function([data0, weight0], out)
+    mod = tvm.IRModule()
+    mod["main"] = main_f
+    mod = transform.InferType()(mod)
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
+    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
+    output_list = generate_ref_data(mod, inputs)
+    return mod, inputs, output_list, test_runner
+
+
+def _generate_runtime_data(
+    input_shapes: dict, output_shapes: dict
+) -> [OrderedDict, OrderedDict]:
+    assert len(input_shapes) == 1
+    assert len(output_shapes) == 1
+
+    iname = list(input_shapes.keys())[0]
+    oname = list(output_shapes.keys())[0]
+    ishape = input_shapes[iname]
+    oshape = output_shapes[oname]
+    i_data = np.random.uniform(0, 1, ishape).astype("float32")
+    o_data = np.random.uniform(0, 1, oshape).astype("float32")
+    oname = "output"  # name set by relay.build in executor_codegen_metadata.outputs
+    inputs = OrderedDict([(iname, i_data)])
+    outputs = OrderedDict([(oname, o_data)])
+    return inputs, outputs
+
+
+if __name__ == "__main__":
+    test_conv2d()
diff --git a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
new file mode 100644
index 000000000000..418967f2c8f5
--- /dev/null
+++ b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA testcase for the vanilla_accelerator accelerator"""
+import pytest
+
+import tvm
+from tvm import tir
+from tvm.relay.dataflow_pattern import is_op, wildcard
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+from tvm.relay.backend.contrib.uma.backend import UMABackend
+from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass as VanillaAcceleratorConv2dPass
+from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
+
+from tvm.relay.backend.contrib.uma._template.patterns import conv2d_pattern
+
+# def conv2d_pattern():
+#     pattern = is_op("nn.conv2d")(wildcard(), wildcard())
+#     pattern = pattern.has_attr({"strides": [1, 1]})
+#     return pattern
+
+
+class VanillaAcceleratorBackend(UMABackend):
+    """UMA backend for the VanillaAccelerator accelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        #######################################################################
+        # Target configuration
+        #######################################################################
+        #self._register_target_attr("dimension")
+
+        #######################################################################
+        # Relay to Relay function registration
+        #######################################################################
+        self._register_pattern("conv2d", conv2d_pattern())
+
+        #######################################################################
+        # Relay to TIR function registration
+        #######################################################################
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2dPass())
+
+
+        #######################################################################
+        # TIR to runtime function registration
+        #######################################################################
+        self._register_codegen(
+            fmt="c", includes=gen_includes
+        )
+
+    @property
+    def target_name(self):
+        return "vanilla_accelerator"

From 4a0a8c502d873af5ad159fa2f1c57a164931f131 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 13 Jul 2022 17:41:24 +0000
Subject: [PATCH 062/112] [UMA] UMAv1

---
 .../relay/backend/contrib/uma/rb_npu/lower.py |  53 -----
 .../contrib/uma/ultra_trail/backend.py        |  62 ------
 .../contrib/uma/ultra_trail/codegen.py        |  31 ---
 .../backend/contrib/uma/ultra_trail/passes.py | 195 ------------------
 .../contrib/uma/ultra_trail/patterns.py       |  27 ---
 .../contrib/uma/ultra_trail/strategies.py     | 120 -----------
 .../contrib/test_ultra_trail_codegen.py       |  82 --------
 7 files changed, 570 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py
 delete mode 100644 python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
 delete mode 100644 tests/python/contrib/test_ultra_trail_codegen.py

diff --git a/python/tvm/relay/backend/contrib/uma/rb_npu/lower.py b/python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
deleted file mode 100644
index e6008f79df1d..000000000000
--- a/python/tvm/relay/backend/contrib/uma/rb_npu/lower.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Codegen for the RP_NPU"""
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.uma.lower import UMALower
-
-
-class RBNPULower(UMALower):
-    def __init__(self):
-        super(RBNPULower, self).__init__()
-
-    def _register_tir_schedules(self):
-        pass
-
-    def _register_tir_passes(self):
-        pass
-
-
-@tvm._ffi.register_func("relay.ext.uma.relay_to_tir_func_rb_npu")
-def relay_to_tir_func_rb_npu(ext_func: relay.Function) -> tvm.tir.PrimFunc:
-    """
-    This is the hook for python-based lowering of relay function
-    that gets offloaded to the RB NPU.
-
-    Parameters
-    ----------
-    ext_func : relay.Function
-        This is the partitioned relay function
-
-    Returns
-    -------
-    prim_func : tir.PrimFunc
-        This returns the scheduled PrimFunc
-    """
-    codegen = RBNPULower()
-    prim_func = codegen.relay_to_tir_func(ext_func)
-    return prim_func
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
deleted file mode 100644
index b53e39ccd6ae..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/backend.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""UMA backend for the UltraTrail accelerator"""
-
-from ..api.utils import PassPhase
-from ..backend import UMABackend
-from .codegen import gen_includes, gen_replace_call_extern
-from .passes import ConfigGenerator, BufferScopeAnnotator, CodegenGenerateExternCalls
-from .patterns import conv1d_relu_pattern
-from .strategies import custom_conv1d_strategy
-
-
-class UltraTrailBackend(UMABackend):
-    """UMA backend for the UltraTrail accelerator."""
-
-    def __init__(self):
-        super().__init__()
-
-        #######################################################################
-        # Target configuration
-        #######################################################################
-        self._register_target_attr("dimension")
-
-        #######################################################################
-        # Relay to Relay function registration
-        #######################################################################
-        self._register_pattern("conv1d_relu", conv1d_relu_pattern())
-
-        self._register_relay_pass(PassPhase.POST_PARTITIONING_0, ConfigGenerator())
-        self._register_relay_pass(PassPhase.POST_PARTITIONING_1, BufferScopeAnnotator())
-
-        #######################################################################
-        # Relay to TIR function registration
-        #######################################################################
-        self._register_operator_strategy("nn.conv1d", custom_conv1d_strategy, plevel=9)
-
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, CodegenGenerateExternCalls())
-
-        #######################################################################
-        # TIR to runtime function registration
-        #######################################################################
-        self._register_codegen(
-            fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
-        )
-
-    @property
-    def target_name(self):
-        return "ultra_trail"
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
deleted file mode 100644
index 1468b7953e59..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/codegen.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""UMA codegen for the UltraTrail accelerator"""
-
-import tvm
-
-
-def gen_includes() -> str:
-    includes = ""
-    includes += "#include <cmem.h>\n"
-    includes += "#include <archi.h>\n"
-    includes += "#include <hal.h>\n"
-    return includes
-
-
-def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
-    return "my_custom_api_function({}, {}, {})".format(*args)
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
deleted file mode 100644
index c6f5bba4a365..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/passes.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Transform passes for the UltraTrail accelerator"""
-
-import tvm
-from tvm import relay, tir
-from tvm.topi.utils import prod
-
-from collections import OrderedDict
-
-
-class LayerConfigGenerator(relay.ExprVisitor):
-    def __init__(self, acc_spec, config):
-        super(LayerConfigGenerator, self).__init__()
-        self.acc_spec = acc_spec
-        self.config = config
-
-    def _extract_config_from_call(self, call):
-        # Config extraction assumes that the pattern matching already performed all validity checks
-        if call.op.name == "nn.conv1d":
-            self.config["ch_in_len"] = int(call.type_args[0].shape[2])
-            self.config["ch_in_blk"] = int(call.type_args[0].shape[1]) // self.acc_spec["array_dim"]
-            self.config["kernel_shape"] = int(call.attrs.kernel_size[0])
-            self.config["stride"] = int(call.attrs.strides[0])
-            self.config["pad"] = int(call.attrs.padding[0] > 0)
-        elif call.op.name == "nn.bias_add":
-            self.config["bias"] = 1
-        elif call.op.name == "nn.relu":
-            self.config["relu"] = 1
-
-    def visit_call(self, call):
-        self._extract_config_from_call(call)
-        for a in call.args:
-            self.visit(a)
-
-
-class SubgraphConfigGenerator(relay.ExprVisitor):
-    def __init__(self, acc_spec):
-        super(SubgraphConfigGenerator, self).__init__()
-        self.acc_spec = acc_spec
-        self.config = []
-
-    def _compute_dataflow_control(self):
-        # The graph-based execution order and corresponding configuration should be computed here.
-        # Currently only switches between two feature memories (no parallel pathes).
-        self.config.reverse()
-
-        mem_ctrl = 0x0
-        for c in self.config:
-            c["mem_ctrl"] = mem_ctrl
-            mem_ctrl = 0x2 if mem_ctrl == 0x0 else 0x0
-
-        self.config[-1]["last"] = 1
-
-    def visit_function(self, func):
-        layer_config = {k: 0 for k in self.acc_spec["conf_reg_layer_bits"].keys()}
-        layer_config["ch_out_len"] = int(func.ret_type.shape[2])
-        layer_config["ch_out_blk"] = int(func.ret_type.shape[1]) // self.acc_spec["array_dim"]
-        LayerConfigGenerator(self.acc_spec, layer_config).visit(func.body)
-        self.config.append(layer_config)
-
-    def generate_config(self, func):
-        self.visit(func.body)
-        self._compute_dataflow_control()
-        return self.config
-
-
-@tvm.ir.transform.module_pass(opt_level=1)
-class ConfigGenerator:
-    """This pass generates a configuration string for the UltraTrail accelerator 
-    for each partitioned relay subgraph."""
-
-    def __init__(self):
-        # Note: This information should eventually be passed as an accelerator description.
-        self.acc_spec = {
-            "array_dim": 8,
-            "conf_reg_layers": 16,
-            "conf_reg_layer_bits": OrderedDict(
-                {
-                    "mem_ctrl": 4,
-                    "ch_in_len": 7,
-                    "ch_in_blk": 4,
-                    "ch_out_len": 7,
-                    "ch_out_blk": 4,
-                    "kernel_shape": 4,
-                    "stride": 3,
-                    "avg_pool_exp": 3,
-                    "pad": 1,
-                    "relu": 1,
-                    "bias": 1,
-                    "avg": 1,
-                    "early_exit": 1,
-                    "last": 1,
-                }
-            ),
-        }
-
-    def _config_to_bitstring(self, config):
-        bitstring = ""
-        for layer in config:
-            for k, v in self.acc_spec["conf_reg_layer_bits"].items():
-                bitstring += "{:0{}b}".format(layer[k], v)
-        return bitstring
-
-    def transform_module(
-        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.ir.IRModule:
-
-        # Create UltraTrail specific config based on matched subgraph
-        for gv, func in mod.functions.items():
-            if func.attrs is not None and func.attrs["Compiler"] == "ultra_trail":
-                config = SubgraphConfigGenerator(self.acc_spec).generate_config(func)
-                config_bitstring = self._config_to_bitstring(config)
-                mod.update_func(gv, func.with_attr("ut_config", config_bitstring))
-
-        return mod
-
-
-@tvm.ir.transform.module_pass(opt_level=1)
-class BufferScopeAnnotator:
-    """This pass annotates the params of an offloaded function with the target UltraTrail memory."""
-
-    def transform_module(
-        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.ir.IRModule:
-        class FunctionVisitor(relay.ExprVisitor):
-            def __init__(self):
-                super(FunctionVisitor, self).__init__()
-                self._var_scope_map = {}
-
-            def visit_call(self, call):
-                if call.op.name == "nn.conv1d":
-                    self._var_scope_map[call.args[1]] = "wmem"
-                elif call.op.name == "nn.bias_add":
-                    self._var_scope_map[call.args[1]] = "bmem"
-
-                for a in call.args:
-                    self.visit(a)
-
-            def var_scope_map(self, func):
-                self.visit(func.body)
-                return self._var_scope_map
-
-        # Annotate buffer scopes
-        for gv, func in mod.functions.items():
-            if func.attrs is not None and func.attrs["Compiler"] == "ultra_trail":
-                var_scope_map = FunctionVisitor().var_scope_map(func)
-                scopes = [
-                    var_scope_map[var] if var in var_scope_map else "fmem" for var in func.params
-                ]
-                mod.update_func(gv, func.with_attr("ut_buffer_scopes", scopes))
-
-        return mod
-
-def insert_extern_calls(sch):
-    def extern_calls():
-        calls = []
-        buffer_scopes = list(sch.mod["main"].attrs["relay_attrs"]["ut_buffer_scopes"])
-        buffer_scopes.reverse() # for some reason TIR params are reversed to relay function
-        for i, buffer_scope in enumerate(buffer_scopes):
-            buffer = sch.mod["main"].buffer_map[sch.mod["main"].params[i]]
-            size = prod(buffer.shape)
-            var = buffer.data
-            call = tir.call_extern("int32", f"load_{buffer_scope}", var, size)
-            calls.append(tir.Evaluate(call))
-        seq = tir.stmt_seq(*calls)
-        return tir.Block([], [], [], "call_extern", seq)
-
-    root_sref = sch.get_sref(sch.get_block("root"))
-    sch.state.replace(root_sref, extern_calls())
-
-    return sch
-
-@tvm.tir.transform.prim_func_pass(opt_level=1)
-class CodegenGenerateExternCalls:
-    def transform_function(
-        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.tir.PrimFunc:
-        sch = tir.Schedule(func)
-        sch = insert_extern_calls(sch)
-        return sch.mod["main"]
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py
deleted file mode 100644
index cc9852e9b611..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/patterns.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Relay graph patterns for the UltraTrail accelerator"""
-
-from tvm.relay.dataflow_pattern import is_op, wildcard
-
-
-def conv1d_relu_pattern():
-    pattern = is_op("nn.conv1d")(wildcard(), wildcard())
-    optional_bias = lambda x: is_op("nn.bias_add")(x, wildcard())
-    optional_relu = lambda x: is_op("nn.relu")(x)
-    pattern = pattern.optional(optional_bias).optional(optional_relu)
-    return pattern
diff --git a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py b/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
deleted file mode 100644
index cd01a8c87132..000000000000
--- a/python/tvm/relay/backend/contrib/uma/ultra_trail/strategies.py
+++ /dev/null
@@ -1,120 +0,0 @@
-from tvm import relay, te
-from tvm.relay.op import op as _op
-from tvm.topi.utils import get_const_tuple
-from tvm.topi.nn.utils import get_pad_tuple1d
-from tvm.relay.op.strategy.generic import wrap_compute_conv1d, wrap_topi_schedule
-
-import logging
-
-logger = logging.getLogger("strategy")
-
-def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None):
-    """1D convolution forward operator for NCW layout.
-
-    Parameters
-    ----------
-    data : tvm.te.Tensor
-        3-D with shape [batch, in_channel, in_width]
-
-    kernel : tvm.te.Tensor
-        3-D with shape [num_filter, in_channel, filter_size]
-
-    strides : int or tuple
-        The spatial stride along width
-
-    padding : int, tuple, or str
-        Padding size can be an integer for equal padding,
-        a tuple of (left, right) or a string in ['VALID', 'SAME'].
-
-    dilation : int or tuple
-        Dilation rate if convolution should be dilated.
-
-    out_dtype : str
-        The output data type. If None then output is same type as input.
-    """
-    s = strides
-    d = dilation
-    if out_dtype is None:
-        out_dtype = data.dtype
-    if isinstance(strides, (tuple, list)):
-        s = strides[0]
-    if isinstance(dilation, (tuple, list)):
-        d = dilation[0]
-
-    batch, in_channels, data_width = data.shape
-    out_channels, _, kernel_size = kernel.shape
-
-    # Compute padding and out width
-    pad_left, pad_right = get_pad_tuple1d(padding, (kernel_size,))
-    if pad_left != pad_right:
-        raise ValueError("Padding has to be symmetric. Got %d %d" % pad_left, pad_right)
-    p = pad_left
-    out_width = (data_width + 2 * p - kernel_size - (kernel_size - 1) * (d - 1)) // s + 1
-
-    # Compute graph
-    rc = te.reduce_axis((0, in_channels), name="rc")
-    rx = te.reduce_axis((0, kernel_size), name="rx")
-    return te.compute(
-        (batch, out_channels, out_width),
-        lambda nn, kk, xx: te.sum(
-            te.if_then_else(
-                te.any(s * xx + d * rx - p < 0, s * xx + d * rx - p >= data_width),
-                0.0,
-                data[nn, rc, s * xx + d * rx - p].astype(out_dtype)
-                * kernel[kk, rc, rx].astype(out_dtype),
-            ),
-            axis=[rc, rx],
-        ),
-        tag="custom_conv1d_ncw",
-    )
-
-
-# TVM integration: Add schedule to `python/tvm/topi/generic/nn.py`
-def schedule_conv1d_ncw(outs):
-    """Schedule for conv1d_ncw
-
-    Parameters
-    ----------
-    outs: Array of Tensor
-          The computation graph description of conv1d_ncw
-          in the format of an array of tensors.
-
-    Returns
-    -------
-    sch: Schedule
-        The computation schedule for the op.
-    """
-    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
-    s = te.create_schedule([x.op for x in outs])
-    nn, kk, xx = s[outs[0]].op.axis
-    rc, rx = s[outs[0]].op.reduce_axis
-
-    # kk_outer, kk_inner = s[outs[0]].split(kk, 8)
-    # xx_outer, xx_inner = s[outs[0]].split(xx, 1)
-
-    # s[outs[0]].reorder(kk_outer, xx_outer, kk_inner, xx_inner)
-    # s[outs[0]].vectorize(xx)
-    # s[outs[0]].unroll(rc)
-
-    return s
-
-
-# TVM integration: Add strategy to `python/tvm/relay/op/strategy/generic.py`
-@relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
-def custom_conv1d_strategy(attrs, inputs, out_type, target):
-    """custom conv1d generic strategy"""
-    logger.warning("custom conv1d is not optimized for this platform.")
-    layout = attrs.data_layout
-    dilation = get_const_tuple(attrs.dilation)
-    if dilation[0] < 1:
-        raise ValueError("dilation should be a positive value")
-    strategy = _op.OpStrategy()
-    if layout == "NCW":
-        strategy.add_implementation(
-            wrap_compute_conv1d(conv1d_ncw),
-            wrap_topi_schedule(schedule_conv1d_ncw),
-            name="custom_conv1d_ncw.generic",
-        )
-    else:
-        raise ValueError("Unsupported conv1d layout {}".format(layout))
-    return strategy
diff --git a/tests/python/contrib/test_ultra_trail_codegen.py b/tests/python/contrib/test_ultra_trail_codegen.py
deleted file mode 100644
index 8c17021d9a1b..000000000000
--- a/tests/python/contrib/test_ultra_trail_codegen.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# FIXME: move to proper test case directory
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
-
-import pytest
-import torch
-import tarfile
-import tempfile
-from pathlib import Path
-
-
-class TorchModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.conv = torch.nn.Conv1d(
-            16, 24, 9, bias=True, padding=4, stride=1, dilation=1, groups=1
-        )
-        self.relu = torch.nn.ReLU()
-        self.conv2 = torch.nn.Conv1d(
-            24, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
-        )
-        self.relu2 = torch.nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.relu(x)
-        x = self.conv2(x)
-        x = self.relu2(x)
-        x = x + 42
-        return x
-
-
-# Target Registration
-ut_backend = UltraTrailBackend()
-ut_backend.register()
-
-@pytest.mark.parametrize(
-    "compound_target", 
-    [
-        [tvm.target.Target("llvm"), tvm.target.Target("ultra_trail", host=tvm.target.Target("llvm"))],
-        [tvm.target.Target("c"), tvm.target.Target("ultra_trail", host=tvm.target.Target("c"))],
-    ]
-)
-def test_ultra_trail(compound_target):
-    torch_mod = TorchModel()
-    # Pytorch frontend
-    input_shape = (1, 16, 20)
-    dummy_input = torch.randn(input_shape)
-    scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
-    mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
-
-    # Relay target specific partitioning
-    mod = ut_backend.partition(mod)
-
-    generic_target = compound_target[0]
-
-    # Relay build (AOT C target)
-    RUNTIME = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-    if str(generic_target.kind) == "llvm":
-        EXECUTOR = tvm.relay.backend.Executor("graph", {"link-params": True})
-    else:
-        EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
-
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
-    ):
-        module = relay.build(mod, target=compound_target, runtime=RUNTIME, executor=EXECUTOR, params=params)
-
-    with tempfile.TemporaryDirectory() as build_dir:
-        build_dir = Path(build_dir)
-        model_library_format_tar_path = build_dir / "lib.tar"
-        model_library_format_tar_path.unlink(missing_ok=True)
-        model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
-
-        tvm.micro.export_model_library_format(module, model_library_format_tar_path)
-
-        print("Built MLF Library: ")
-        with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
-            print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
-            tar_f.extractall(model_library_format_tar_path.parent)
\ No newline at end of file

From 8767b4ad0b78b7b1f7fabed27cb2aa62f87ebdb0 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 13 Jul 2022 17:44:18 +0000
Subject: [PATCH 063/112] [UMA] cmake file updated

---
 cmake/config.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 0a70f1daa86e..356fd7b4b1b5 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -96,7 +96,7 @@ set(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT OFF)
 set(USE_OPENGL OFF)
 
 # Whether enable MicroTVM runtime
-set(USE_MICRO ON)
+set(USE_MICRO OFF)
 
 # Whether enable RPC runtime
 set(USE_RPC ON)
@@ -133,7 +133,7 @@ set(USE_MICRO_STANDALONE_RUNTIME OFF)
 # - OFF: disable llvm, note this will disable CPU codegen
 #        which is needed for most cases
 # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available.
-set(USE_LLVM ON)
+set(USE_LLVM OFF)
 
 #---------------------------------------------
 # Contrib libraries
@@ -382,4 +382,4 @@ set(SUMMARIZE OFF)
 set(USE_LIBTORCH OFF)
 
 # Whether to use the Universal Modular Accelerator Interface
-set(USE_UMA ON)
+set(USE_UMA OFF)

From 4a59b38fe52d5412d8a97b1fa23a16ef76888838 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 07:24:14 +0000
Subject: [PATCH 064/112] AOT test infrastructure adapted

---
 python/tvm/testing/aot.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index d1025bfe600c..78ced79433bf 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -837,8 +837,7 @@ def run_and_check_body(base_path):
             assert AOT_SUCCESS_TOKEN in run_log.read()
 
     if test_dir is None:
-        tmpdir = utils.tempdir(keep_for_debug=True)
-        print(f"TEMP DIR {tmpdir.path}")
+        tmpdir = utils.tempdir()
         run_and_check_body(os.path.join(tmpdir.path, "test"))
     else:
         run_and_check_body(test_dir)

From eb5935c5b6445819b98751dc776a7cd99c8d01bb Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 13 Jul 2022 10:50:00 +0200
Subject: [PATCH 065/112] UMA: add __init__.py for uma.api

---
 python/tvm/relay/backend/contrib/uma/api/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 python/tvm/relay/backend/contrib/uma/api/__init__.py

diff --git a/python/tvm/relay/backend/contrib/uma/api/__init__.py b/python/tvm/relay/backend/contrib/uma/api/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1

From d29e2f1249eec31d553b5152262c8cd9b4eb8230 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 13 Jul 2022 13:32:08 +0200
Subject: [PATCH 066/112] Finish uma tests

---
 .../relay/backend/contrib/uma/api/__init__.py | 23 ++++++++
 .../backend/contrib/uma/api/partitioner.py    | 17 ++++--
 .../tvm/relay/backend/contrib/uma/backend.py  |  5 +-
 .../python/contrib/test_uma/test_partition.py | 57 ++++++++++++++++++-
 tests/python/contrib/test_uma/test_target.py  |  6 +-
 5 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/__init__.py b/python/tvm/relay/backend/contrib/uma/api/__init__.py
index e69de29bb2d1..40f48b32d3b1 100644
--- a/python/tvm/relay/backend/contrib/uma/api/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/api/__init__.py
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .codegen import UMACodegen
+from .lower import UMALower
+from .partitioner import UMAPartitioner
+
+
+__all__ = ["UMACodegen", "UMALower", "UMAPartitioner"]
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 40014756207b..2d4ef0c20e73 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -16,7 +16,7 @@
 # under the License.
 """Partitioner base class of the Universal Modular Accelerator Interface (UMA)"""
 
-from typing import Dict, List, Tuple, Optional
+from typing import Callable, Dict, List, Tuple, Optional
 
 import tvm
 from tvm import relay
@@ -25,6 +25,8 @@
 from .utils import PassPhase
 
 
+PatternTable = List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]
+
 class UMAPartitioner():
     """Partitioner base class of the Universal Modular Accelerator Interface (UMA)."""
 
@@ -33,10 +35,17 @@ def __init__(self, target_name: str, merge_compiler_regions: bool = True) -> Non
         self.merge_compiler_regions = merge_compiler_regions
 
         self._relay_passes: List[Tuple[PassPhase, tvm.transform.Pass]] = []
-        self._patterns: List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern]] = []
+        self._patterns: PatternTable = []
+
+    def add_pattern(self, name : str, pattern : tvm.relay.dataflow_pattern.DFPattern, predicate : Optional[Callable] = None) -> None:
+        name = self.target_name + "." + name
+        if predicate:
+            self._patterns.append((name, pattern, predicate))
+        else:
+            self._patterns.append((name, pattern))
 
-    def _pattern_table(self):
-        return [(self.target_name + "." + pattern[0], pattern[1]) for pattern in self._patterns]
+    def _pattern_table(self) -> PatternTable:
+        return self._patterns
 
     def register(self) -> None:
         """Register all relevant relay-to-relay functions."""
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 7079624a6633..d9a04a859690 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -115,6 +115,7 @@ def _register_pattern(
         self,
         name: str,
         pattern: tvm.relay.dataflow_pattern.DFPattern,
+        predicate: Optional[Callable] = None,
     ) -> None:
         """Registers a dataflow pattern that is used to partition the relay graph.
 
@@ -126,6 +127,8 @@ def _register_pattern(
         pattern: tvm.relay.dataflow_pattern.DFPattern
             The dataflow pattern.
 
+        predicate: Callable Receiving the matched pattern and 
+
         Example
         -------
         Here is an example of how two dataflow patterns are registered.
@@ -145,7 +148,7 @@ def _register_pattern(
             optional_relu = lambda x: is_op("nn.relu")(x)
             conv1d_pattern = conv1d_pattern.optional(optional_bias).optional(optional_relu)
         """
-        self._relay_to_relay._patterns.append((name, pattern))
+        self._relay_to_relay.add_pattern(name, pattern, predicate)
 
     ############################################################################
     # Relay to TIR function registration
diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index 861bf5ff4da2..4e3bf0988d01 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -15,5 +15,60 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import pytest
 
-# TODO: cgerum
\ No newline at end of file
+import tvm
+
+from tvm.relay.backend.contrib.uma.api import UMAPartitioner
+from tvm.relay.op.contrib.register import get_pattern_table
+from tvm.relay.testing import resnet, mlp
+
+def test_partition_table():
+    partitioner = UMAPartitioner("test_partition")
+    assert get_pattern_table("test_partition") is None
+
+    partitioner.register()
+
+    assert get_pattern_table("test_partition") is not None
+
+    
+
+    def conv2d_pattern():
+
+@pytest.mark.parametrize(
+    "workload,backend,merge,expected_partitions",
+    [
+        ("resnet", "dnnl", False, 17),
+        ("resnet", "dnnl", True, 17),
+        ("mlp", "dnnl", False, 1),
+        ("resnet", "cutlass", False, 2),
+        ("resnet", "cutlass", True, 2),
+        ("mlp", "cutlass", False, 4),
+        ("mlp", "cutlass", True, 2),
+    ]
+)
+def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
+    partitioner = UMAPartitioner(backend+"_uma", merge)
+    pattern_table = get_pattern_table(backend)
+
+    for entry in pattern_table:
+        partitioner.add_pattern(*entry)
+
+
+    if workload == "resnet":
+        net = resnet.get_net(1, 10)
+    elif workload == "mlp":
+        net = mlp.get_net(1, 10)
+
+
+    mod = tvm.ir.IRModule()
+    mod["main"] = net
+
+    partitioner.register()
+    partitioned_mod = partitioner.partition(mod)
+    print(partitioned_mod)
+
+    assert len(partitioned_mod.functions) == expected_partitions
+
+if __name__ == "__main__":
+    tvm.testing.main()
\ No newline at end of file
diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index 22c24414ff21..adab01f04910 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -52,4 +52,8 @@ def test_uma_target(target_name, target_attrs, target_args):
     my_target = tvm.target.Target(f"{target_name} {args}")
 
     for attr in target_args.keys():
-        assert my_target.attrs[attr] == target_args[attr]
\ No newline at end of file
+        assert my_target.attrs[attr] == target_args[attr]
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
\ No newline at end of file

From d038b9f1974c1d1dfcb8d68ddf3dce8c2394fdbe Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Thu, 14 Jul 2022 09:31:53 +0200
Subject: [PATCH 067/112] Use upstream version of dmlc-core

---
 3rdparty/dmlc-core | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core
index 21cc7de0dc9f..09511cf9fe5f 160000
--- a/3rdparty/dmlc-core
+++ b/3rdparty/dmlc-core
@@ -1 +1 @@
-Subproject commit 21cc7de0dc9fd6acb796e1be6181fa8e6b6c8f41
+Subproject commit 09511cf9fe5ff103900a5eafb50870dc84cc17c8

From a47ebfd60bcc9e5f6a5e8752fde242851c5b1ba2 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 08:28:19 +0000
Subject: [PATCH 068/112] [UMA] tir_to_runtime documentation update

---
 src/relay/backend/contrib/uma/tir_to_runtime.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index e34bb1e90207..d65e1d84a143 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -48,9 +48,9 @@ class UMACodegen : public codegen::CodeGenCHost {
   }
 
   /*!
-   * \brief Emit code that offloads a subgraph to the Cortex-M
+   * \brief Emit code that offloads a subgraph to the UMA target
    *
-   * \return string of code that offloads a subgraph to the Cortex-M
+   * \return string of code that offloads a subgraph to the UMA target
    */
   void AddFunction(const PrimFunc& prim_func) { CodeGenC::AddFunction(prim_func); }
 

From 25562b2d6c7b5a0bf87a2e62e37dd15463be7afc Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 08:31:45 +0000
Subject: [PATCH 069/112] [UMA] cleanup

---
 python/tvm/relay/backend/contrib/uma/run.py | 70 ---------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/run.py

diff --git a/python/tvm/relay/backend/contrib/uma/run.py b/python/tvm/relay/backend/contrib/uma/run.py
deleted file mode 100644
index 1071883faeda..000000000000
--- a/python/tvm/relay/backend/contrib/uma/run.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import torch
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.uma.ultra_trail.backend import UltraTrailBackend
-
-
-import tarfile
-from pathlib import Path
-
-
-class TorchModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.conv = torch.nn.Conv1d(
-            16, 24, 9, bias=True, padding=4, stride=1, dilation=1, groups=1
-        )
-        self.relu = torch.nn.ReLU()
-        self.conv2 = torch.nn.Conv1d(
-            24, 24, 9, bias=False, padding=4, stride=1, dilation=1, groups=1
-        )
-        self.relu2 = torch.nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.relu(x)
-        x = self.conv2(x)
-        x = self.relu2(x)
-        x = x + 42
-        return x
-
-
-def main():
-    torch_mod = TorchModel()
-
-    # Pytorch frontend
-    input_shape = (1, 16, 20)
-    dummy_input = torch.randn(input_shape)
-    scripted_model = torch.jit.trace(torch_mod, dummy_input).eval()
-    mod, params = relay.frontend.from_pytorch(scripted_model, [("input_data", input_shape)])
-
-    # Relay target specific partitioning
-    ut_backend = UltraTrailBackend()
-    ut_backend.register()
-    mod = ut_backend.partition(mod)
-
-    # Relay build (AOT C target)
-    TARGET = tvm.target.Target("c")
-    RUNTIME = tvm.relay.backend.Runtime("crt")
-    EXECUTOR = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
-
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
-    ):
-        module = relay.build(mod, target=TARGET, runtime=RUNTIME, executor=EXECUTOR, params=params)
-
-    model_library_format_tar_path = Path("build/lib.tar")
-    model_library_format_tar_path.unlink(missing_ok=True)
-    model_library_format_tar_path.parent.mkdir(parents=True, exist_ok=True)
-
-    tvm.micro.export_model_library_format(module, model_library_format_tar_path)
-
-    print("Built MLF Library: ")
-    with tarfile.open(model_library_format_tar_path, "r:*") as tar_f:
-        print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
-        tar_f.extractall(model_library_format_tar_path.parent)
-
-
-if __name__ == "__main__":
-    main()

From 0b9f95100a1d337a75781092d9e09eec30e27fb3 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 08:41:46 +0000
Subject: [PATCH 070/112] [UMA] fix for test_partition

---
 tests/python/contrib/test_uma/test_partition.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index 4e3bf0988d01..6922a4fa2c50 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -23,6 +23,7 @@
 from tvm.relay.op.contrib.register import get_pattern_table
 from tvm.relay.testing import resnet, mlp
 
+
 def test_partition_table():
     partitioner = UMAPartitioner("test_partition")
     assert get_pattern_table("test_partition") is None
@@ -31,9 +32,6 @@ def test_partition_table():
 
     assert get_pattern_table("test_partition") is not None
 
-    
-
-    def conv2d_pattern():
 
 @pytest.mark.parametrize(
     "workload,backend,merge,expected_partitions",
@@ -45,22 +43,20 @@ def conv2d_pattern():
         ("resnet", "cutlass", True, 2),
         ("mlp", "cutlass", False, 4),
         ("mlp", "cutlass", True, 2),
-    ]
+    ],
 )
 def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
-    partitioner = UMAPartitioner(backend+"_uma", merge)
+    partitioner = UMAPartitioner(backend + "_uma", merge)
     pattern_table = get_pattern_table(backend)
 
     for entry in pattern_table:
         partitioner.add_pattern(*entry)
 
-
     if workload == "resnet":
         net = resnet.get_net(1, 10)
     elif workload == "mlp":
         net = mlp.get_net(1, 10)
 
-
     mod = tvm.ir.IRModule()
     mod["main"] = net
 
@@ -70,5 +66,6 @@ def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
 
     assert len(partitioned_mod.functions) == expected_partitions
 
+
 if __name__ == "__main__":
-    tvm.testing.main()
\ No newline at end of file
+    tvm.testing.main()

From 02079cf02136026b81e6044192fce5646ed67a8a Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 12:04:45 +0000
Subject: [PATCH 071/112] [UMA] lint fix

---
 .../relay/backend/contrib/uma/api/__init__.py |  2 ++
 .../relay/backend/contrib/uma/api/codegen.py  |  7 +++--
 .../relay/backend/contrib/uma/api/lower.py    |  1 +
 .../backend/contrib/uma/api/partitioner.py    | 10 +++++--
 .../relay/backend/contrib/uma/api/utils.py    |  4 ++-
 .../tvm/relay/backend/contrib/uma/backend.py  |  1 +
 .../tvm/relay/backend/contrib/uma/uma_cli.py  |  7 +++++
 tests/python/contrib/test_uma/test_target.py  | 22 +++++++---------
 .../test_uma_lowering_with_umalower.py        | 18 ++++++++++---
 .../contrib/test_uma/test_uma_pipeline.py     |  6 ++---
 .../python/contrib/test_uma/test_uma_utils.py | 26 ++++++++++++++-----
 .../test_uma/test_uma_vanilla_accelerator.py  | 14 +++-------
 12 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/__init__.py b/python/tvm/relay/backend/contrib/uma/api/__init__.py
index 40f48b32d3b1..f826a56016fa 100644
--- a/python/tvm/relay/backend/contrib/uma/api/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/api/__init__.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+"""UMA: Universal Modular Accelerator Interface API"""
+
 from .codegen import UMACodegen
 from .lower import UMALower
 from .partitioner import UMAPartitioner
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index 92a9b8d647ca..eb3dd4d2b48b 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -16,12 +16,15 @@
 # under the License.
 """Codegen base class of the Universal Modular Accelerator Interface (UMA)"""
 
-import tvm
-
 from typing import Callable
+import tvm
 
 
 class UMACodegen(object):
+    """
+    Codegen base class of the Universal Modular Accelerator Interface (UMA)
+    """
+
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 5eeaf7d49daf..67e5995bbddf 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -57,6 +57,7 @@ def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFu
             The lowered schedulable TensorIR primitive function.
 
         """
+
         def _get_tensors(te_cached_func):
             outputs = list(te_cached_func.outputs)
             stack = []
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 2d4ef0c20e73..9cff751986fe 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -27,7 +27,8 @@
 
 PatternTable = List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]
 
-class UMAPartitioner():
+
+class UMAPartitioner:
     """Partitioner base class of the Universal Modular Accelerator Interface (UMA)."""
 
     def __init__(self, target_name: str, merge_compiler_regions: bool = True) -> None:
@@ -37,7 +38,12 @@ def __init__(self, target_name: str, merge_compiler_regions: bool = True) -> Non
         self._relay_passes: List[Tuple[PassPhase, tvm.transform.Pass]] = []
         self._patterns: PatternTable = []
 
-    def add_pattern(self, name : str, pattern : tvm.relay.dataflow_pattern.DFPattern, predicate : Optional[Callable] = None) -> None:
+    def add_pattern(
+        self,
+        name: str,
+        pattern: tvm.relay.dataflow_pattern.DFPattern,
+        predicate: Optional[Callable] = None,
+    ) -> None:
         name = self.target_name + "." + name
         if predicate:
             self._patterns.append((name, pattern, predicate))
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index 0e4a21b2d214..7c6910881c0c 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -44,7 +44,9 @@ def _c_to_llvm(c_code: str) -> str:
     return ll_code
 
 
-def add_llvm_to_block(sch: tvm.tir.Schedule, block_name: str, c_code_str: str = "") -> tvm.tir.Schedule:
+def add_llvm_to_block(
+    sch: tvm.tir.Schedule, block_name: str, c_code_str: str = ""
+) -> tvm.tir.Schedule:
     block = sch.get_block(block_name)
     loops = sch.get_loops(block)
     assert len(loops) > 0
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index d9a04a859690..2beb981262f3 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -27,6 +27,7 @@
 
 
 class UMABackend(ABC):
+    """Backend base class of the Universal Modular Accelerator Interface (UMA)"""
     def __init__(self, merge_compiler_regions: bool = True) -> None:
         self._target_attrs: Dict = {}
         self._target_preprocessor: Callable[[str], Dict[str, Any]] = None
diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/python/tvm/relay/backend/contrib/uma/uma_cli.py
index e5fe6d107799..6f650c235ee8 100644
--- a/python/tvm/relay/backend/contrib/uma/uma_cli.py
+++ b/python/tvm/relay/backend/contrib/uma/uma_cli.py
@@ -15,6 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
+"""
+    UMA Command Line Interface (CLI)
+
+    Tool to create code skeletons for an easy integration of
+    new AI hardware accelerators/libraries into TVM using UMA
+"""
+
 import argparse
 import os
 import shutil
diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index adab01f04910..bc6d93877479 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -18,22 +18,20 @@
 import pytest
 import tvm
 
+
 @pytest.mark.parametrize(
     "target_name,target_attrs,target_args",
     [
         ("my_hwa", {}, {}),
         (
-            "my_hwa2", 
+            "my_hwa2",
             {
-                "local_memory_size": 128*1024,
+                "local_memory_size": 128 * 1024,
                 "variant": "version1",
-            }, 
-            {
-                "local_memory_size": 256*1024, 
-                "variant": "version2"
-            }
-        )
-    ]
+            },
+            {"local_memory_size": 256 * 1024, "variant": "version2"},
+        ),
+    ],
 )
 def test_uma_target(target_name, target_attrs, target_args):
     registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
@@ -43,12 +41,12 @@ def test_uma_target(target_name, target_attrs, target_args):
     my_target = tvm.target.Target(target_name)
 
     assert str(my_target.kind) == target_name
-    
+
     for attr in target_attrs.keys():
         assert my_target.attrs[attr] == target_attrs[attr]
 
     # Test with parameters overwritten
-    args = " ".join((F"--{k}={v}" for k,v in target_args.items()))
+    args = " ".join((f"--{k}={v}" for k, v in target_args.items()))
     my_target = tvm.target.Target(f"{target_name} {args}")
 
     for attr in target_args.keys():
@@ -56,4 +54,4 @@ def test_uma_target(target_name, target_attrs, target_args):
 
 
 if __name__ == "__main__":
-    tvm.testing.main()
\ No newline at end of file
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index aa129d387231..04e44b8a4a7c 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -28,10 +28,18 @@
 
 
 def _conv2d_te_definition(shapes: dict) -> list:
-    n, w, h, ci, kw, kh, co = shapes["n"], shapes["w"], shapes["h"], shapes["ci"], shapes["kw"], shapes["kh"], shapes["co"],
+    n, w, h, ci, kw, kh, co = (
+        shapes["n"],
+        shapes["w"],
+        shapes["h"],
+        shapes["ci"],
+        shapes["kw"],
+        shapes["kh"],
+        shapes["co"],
+    )
     ifmap = te.placeholder((n, ci, w, h), dtype="float32", name="ifmap")
     weights = te.placeholder((co, ci, kw, kh), dtype="float32", name="weights")
-    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=[kw//2, kh//2], dilation=1)
+    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=[kw // 2, kh // 2], dilation=1)
     return [ifmap, weights, result]
 
 
@@ -42,7 +50,9 @@ def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     conv2d_file = uma_path / "_template" / "conv2dnchw.cpp"
 
     with conv2d_file.open() as f:
-        sch_tir = _create_schedule(placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl)
+        sch_tir = _create_schedule(
+            placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl
+        )
     return placeholders, sch_tir
 
 
@@ -74,6 +84,7 @@ def _prepare_io_arrays(conv2d_shapes, dev):
     reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
     return dut_io_arrays, reference_io_arrays
 
+
 @pytest.mark.parametrize(
     "n, w, h, ci, kw, kh, co",
     [
@@ -82,7 +93,6 @@ def _prepare_io_arrays(conv2d_shapes, dev):
         (1, 224, 224, 3, 7, 7, 4),
         (1, 224, 320, 3, 7, 7, 4),
         (1, 224, 224, 3, 7, 7, 4),
-
     ],
 )
 def test_lower_with_uma(n, w, h, ci, kw, kh, co):
diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
index a3770968bb55..5715eab31fbe 100644
--- a/tests/python/contrib/test_uma/test_uma_pipeline.py
+++ b/tests/python/contrib/test_uma/test_uma_pipeline.py
@@ -50,7 +50,7 @@ def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_sha
         test_runner,
         interface_api,
         use_unpacked_api,
-        target=target
+        target=target,
     )
 
 
@@ -81,9 +81,7 @@ def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
     return mod, inputs, output_list, test_runner
 
 
-def _generate_runtime_data(
-    input_shapes: dict, output_shapes: dict
-) -> [OrderedDict, OrderedDict]:
+def _generate_runtime_data(input_shapes: dict, output_shapes: dict) -> [OrderedDict, OrderedDict]:
     assert len(input_shapes) == 1
     assert len(output_shapes) == 1
 
diff --git a/tests/python/contrib/test_uma/test_uma_utils.py b/tests/python/contrib/test_uma/test_uma_utils.py
index 15f06d01fa6e..933602806f0e 100644
--- a/tests/python/contrib/test_uma/test_uma_utils.py
+++ b/tests/python/contrib/test_uma/test_uma_utils.py
@@ -25,9 +25,11 @@
 from typing import Union
 
 
-def _create_schedule(placeholder: list,
-                     c_code: Union[str, io.TextIOWrapper] = "",
-                     use_external_conv2d_impl: bool = True):
+def _create_schedule(
+    placeholder: list,
+    c_code: Union[str, io.TextIOWrapper] = "",
+    use_external_conv2d_impl: bool = True,
+):
     # How to do the same with TE
     # Add pragma TE
     # s = te.create_schedule(result.op)
@@ -45,8 +47,12 @@ def _create_schedule(placeholder: list,
     else:
         raise TypeError()
 
-    assert use_external_conv2d_impl and c_code_str != "" \
-           or not use_external_conv2d_impl and c_code_str == ""
+    assert (
+        use_external_conv2d_impl
+        and c_code_str != ""
+        or not use_external_conv2d_impl
+        and c_code_str == ""
+    )
 
     def _c_to_llvm(c_code: str) -> str:
         temp = utils.tempdir()
@@ -65,7 +71,15 @@ def _c_to_llvm(c_code: str) -> str:
 
 
 def _generate_io_arrays(shapes: dict, dev):
-    n, w, h, ci, kw, kh, co = shapes["n"], shapes["w"], shapes["h"], shapes["ci"], shapes["kw"], shapes["kh"], shapes["co"],
+    n, w, h, ci, kw, kh, co = (
+        shapes["n"],
+        shapes["w"],
+        shapes["h"],
+        shapes["ci"],
+        shapes["kw"],
+        shapes["kh"],
+        shapes["co"],
+    )
 
     ifmap_data = tvm.nd.array(np.random.uniform(size=(n, ci, w, h)).astype("float32"), dev)
     weight_data = tvm.nd.array(np.random.uniform(size=(co, ci, kh, kw)).astype("float32"), dev)
diff --git a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
index 418967f2c8f5..96dc2508d9ab 100644
--- a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
+++ b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
@@ -22,7 +22,9 @@
 from tvm.relay.dataflow_pattern import is_op, wildcard
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
 from tvm.relay.backend.contrib.uma.backend import UMABackend
-from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass as VanillaAcceleratorConv2dPass
+from tvm.relay.backend.contrib.uma._template.passes import (
+    MyAiHwConv2dPass as VanillaAcceleratorConv2dPass,
+)
 from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
 
 from tvm.relay.backend.contrib.uma._template.patterns import conv2d_pattern
@@ -39,11 +41,6 @@ class VanillaAcceleratorBackend(UMABackend):
     def __init__(self):
         super().__init__()
 
-        #######################################################################
-        # Target configuration
-        #######################################################################
-        #self._register_target_attr("dimension")
-
         #######################################################################
         # Relay to Relay function registration
         #######################################################################
@@ -54,13 +51,10 @@ def __init__(self):
         #######################################################################
         self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2dPass())
 
-
         #######################################################################
         # TIR to runtime function registration
         #######################################################################
-        self._register_codegen(
-            fmt="c", includes=gen_includes
-        )
+        self._register_codegen(fmt="c", includes=gen_includes)
 
     @property
     def target_name(self):

From e3467546918ea6e107a09b264612b7ea173adaf4 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 13:11:08 +0000
Subject: [PATCH 072/112] [UMA] lint fix

---
 .../relay/backend/contrib/uma/api/lower.py    | 22 +++++++++----------
 .../backend/contrib/uma/api/partitioner.py    | 21 ++++++++++++++++--
 .../tvm/relay/backend/contrib/uma/backend.py  |  7 ++++--
 .../tvm/relay/backend/contrib/uma/uma_cli.py  | 11 ++++++++--
 python/tvm/testing/aot.py                     |  1 -
 5 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index 67e5995bbddf..f7178bf30580 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -19,9 +19,9 @@
 from typing import List, Tuple, Callable, Optional
 
 import tvm
-from . import _ffi_api
 from tvm import relay, te
 from tvm.relay.op.op import register_strategy
+from . import _ffi_api
 from .utils import PassPhase
 
 
@@ -62,10 +62,10 @@ def _get_tensors(te_cached_func):
             outputs = list(te_cached_func.outputs)
             stack = []
             visited = set()
-            for o in outputs:
-                if o not in visited:
-                    visited.add(o)
-                    stack.append(o)
+            for output_ in outputs:
+                if output_ not in visited:
+                    visited.add(output_)
+                    stack.append(output_)
 
             args = []
             while len(stack) != 0:
@@ -74,10 +74,10 @@ def _get_tensors(te_cached_func):
                     args.append(tensor)
                 elif isinstance(tensor.op, tvm.te.tensor.ComputeOp):
                     inputs = tensor.op.input_tensors
-                    for i0 in inputs:
-                        if i0 not in visited:
-                            visited.add(i0)
-                            stack.append(i0)
+                    for input_ in inputs:
+                        if input_ not in visited:
+                            visited.add(input_)
+                            stack.append(input_)
 
             return args + outputs
 
@@ -145,11 +145,11 @@ def relay_to_tir(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
             The Relay module with scheduled NPU external functions.
         """
         mod = _ffi_api.OutlineCompilerFunctions(self.target_name)(mod)
-        for gv, func in mod.functions.items():
+        for gvar, func in mod.functions.items():
             if "Compiler" in func.attrs and func.attrs["Compiler"] == self.target_name:
                 func = self._lower_relay_to_tir(func)
                 func = self._lower_stir_to_nstir(func)
-                mod.update_func(gv, func)
+                mod.update_func(gvar, func)
         return mod
 
     def register(self) -> None:
diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 9cff751986fe..23c3baa5335c 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -44,6 +44,21 @@ def add_pattern(
         pattern: tvm.relay.dataflow_pattern.DFPattern,
         predicate: Optional[Callable] = None,
     ) -> None:
+        """Add pattern to UMA partitioner
+
+        Parameters
+        ----------
+        name : str
+            relay name of pattern
+
+        pattern: tvm.relay.dataflow_pattern.DFPattern
+            pattern description as DFPattern
+
+        predicate: Optional[Callable]
+            Optional predicate
+
+        """
+
         name = self.target_name + "." + name
         if predicate:
             self._patterns.append((name, pattern, predicate))
@@ -67,6 +82,8 @@ def partition(
         mod : tvm.IRModule
             The relay module to be partitioned.
 
+        params: Optional[Dict[str, tvm.runtime.NDArray]]
+
         Returns
         -------
         out : tvm.IRModule
@@ -92,8 +109,8 @@ def partition(
         )(mod)
         mod = relay.transform.InferType()(mod)
         # Defunctionalize the partitioned functions to allow lowering
-        for gv, func in mod.functions.items():
-            mod.update_func(gv, relay.transform.Defunctionalization(func, mod))
+        for gvar, func in mod.functions.items():
+            mod.update_func(gvar, relay.transform.Defunctionalization(func, mod))
         mod = tvm.transform.Sequential(
             [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1]
         )(mod)
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 2beb981262f3..6f6025067ce3 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -28,6 +28,7 @@
 
 class UMABackend(ABC):
     """Backend base class of the Universal Modular Accelerator Interface (UMA)"""
+
     def __init__(self, merge_compiler_regions: bool = True) -> None:
         self._target_attrs: Dict = {}
         self._target_preprocessor: Callable[[str], Dict[str, Any]] = None
@@ -128,7 +129,7 @@ def _register_pattern(
         pattern: tvm.relay.dataflow_pattern.DFPattern
             The dataflow pattern.
 
-        predicate: Callable Receiving the matched pattern and 
+        predicate: Callable Receiving the matched pattern and
 
         Example
         -------
@@ -201,7 +202,9 @@ def custom_conv1d_strategy(attrs, inputs, out_type, target):
         """
         self._relay_to_tir._operator_strategies.append((op, strategy, plevel))
 
-    def _register_tir_pass(self, phase: PassPhase, tir_pass: tvm.tir.transform.PrimFuncPass) -> None:
+    def _register_tir_pass(
+        self, phase: PassPhase, tir_pass: tvm.tir.transform.PrimFuncPass
+    ) -> None:
         """Registers a TIR pass at the given phase in the lowering process.
 
         Parameters
diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/python/tvm/relay/backend/contrib/uma/uma_cli.py
index 6f650c235ee8..c3321eb70f0f 100644
--- a/python/tvm/relay/backend/contrib/uma/uma_cli.py
+++ b/python/tvm/relay/backend/contrib/uma/uma_cli.py
@@ -25,6 +25,7 @@
 import argparse
 import os
 import shutil
+import sys
 from inflection import camelize, underscore
 
 
@@ -43,7 +44,9 @@ def _parse_args():
     return args
 
 
-def replace_template_name(files: list, template_name: str, add_hw_name: str, template_source: str = "_template") -> None:
+def replace_template_name(
+    files: list, template_name: str, add_hw_name: str, template_source: str = "_template"
+) -> None:
     for f in files:
         with open(f) as read_file:
             data = read_file.read()
@@ -55,11 +58,15 @@ def replace_template_name(files: list, template_name: str, add_hw_name: str, tem
 
 
 def main():
+    """
+        UMA Command Line Interface (CLI)
+    """
     args = _parse_args()
     add_hw_name = args.add_hardware
     add_hw_path = os.path.join(os.getcwd(), add_hw_name)
     if os.path.exists(add_hw_path):
-        raise ValueError(f"Hardware with name {add_hw_name} already exists in UMA file structure")
+        print(f"Hardware with name {add_hw_name} already exists in UMA file structure")
+        sys.exit(-1)
     else:
         os.mkdir(add_hw_name)
 
diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 78ced79433bf..46fdae40a88c 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -23,7 +23,6 @@
 import shutil
 import subprocess
 import tarfile
-import tempfile
 import logging
 from typing import Any, NamedTuple, Union, Optional, List, Dict
 import numpy as np

From e9e8d005c77c85faf9e71b4c354e1b435f120e8a Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 13:34:20 +0000
Subject: [PATCH 073/112] [UMA] lint fix

---
 .../uma/_template/{conv2dnchw.cpp => conv2dnchw.cc}  |  0
 python/tvm/relay/backend/contrib/uma/api/utils.py    |  4 +---
 python/tvm/relay/backend/contrib/uma/backend.py      | 12 ++++++++----
 python/tvm/relay/backend/contrib/uma/tutorial.md     |  4 ++--
 python/tvm/relay/backend/contrib/uma/uma_cli.py      |  2 +-
 .../test_uma/test_uma_lowering_with_umalower.py      |  2 +-
 6 files changed, 13 insertions(+), 11 deletions(-)
 rename python/tvm/relay/backend/contrib/uma/_template/{conv2dnchw.cpp => conv2dnchw.cc} (100%)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cpp
rename to python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index 7c6910881c0c..cef6a5c229d4 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -17,11 +17,9 @@
 """Utility methods for the Universal Modular Accelerator Interface (UMA)"""
 
 from enum import Enum, auto
-
-# TODO: naming
+import uuid
 import tvm.tir
 from tvm.contrib import utils, clang
-import uuid
 
 
 class PassPhase(Enum):
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 6f6025067ce3..335c651dcd20 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -171,12 +171,14 @@ def _register_operator_strategy(
         op: str
            The name of the operator for which this strategy will be registered.
 
-        strategy: Callable[[tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target], tvm.relay.op.op.OpStrategy]
+        strategy: Callable[[tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
+                            tvm.relay.op.op.OpStrategy]
             The strategy function.
 
         plevel: Optional[int] = 11
             The priority level of the strategy. Higher plevel equals higher priorization.
-            The TVM default for topi strategies is 10 so by default new UMA strategies are always used.
+            The TVM default for topi strategies is 10 so by default new UMA strategies are
+            always used.
 
         Example
         -------
@@ -261,9 +263,11 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
                 fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
             )
 
-        The C-codegen provides two hooks which allows the user to insert code through the python API.
+        The C-codegen provides two hooks which allows the user to insert code through
+        the python API.
             - `includes` hooks into the include stream and allows insertion of custom includes.
-            - `replace_call_extern` hooks into the expression visitor and allows the user to insert custom code for a given extern call.
+            - `replace_call_extern` hooks into the expression visitor and allows the user to insert
+            custom code for a given extern call.
 
         The code generation functions can look like this:
 
diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
index 1d7580501126..842ce97782de 100644
--- a/python/tvm/relay/backend/contrib/uma/tutorial.md
+++ b/python/tvm/relay/backend/contrib/uma/tutorial.md
@@ -42,7 +42,7 @@ $ ls tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator
 
 backend.py
 codegen.py
-conv2dnchw.cpp
+conv2dnchw.cc
 passes.py
 patterns.py
 run.py
@@ -106,7 +106,7 @@ self._register_codegen(fmt="c", includes=gen_includes)
 
 # in vanilla_accelerator/codegen.py
 def gen_includes() -> str:
-    return "#include \"conv2dnchw.cpp\""
+    return "#include \"conv2dnchw.cc\""
 ```        
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/python/tvm/relay/backend/contrib/uma/uma_cli.py
index c3321eb70f0f..0d350d173eae 100644
--- a/python/tvm/relay/backend/contrib/uma/uma_cli.py
+++ b/python/tvm/relay/backend/contrib/uma/uma_cli.py
@@ -73,7 +73,7 @@ def main():
     uma_template_path = "_template"
     uma_files = ["backend.py", "codegen.py", "passes.py", "patterns.py", "run.py", "strategies.py"]
     if args.tutorial == "vanilla":
-        uma_files.append("conv2dnchw.cpp")
+        uma_files.append("conv2dnchw.cc")
 
     source_files = [os.path.join(uma_template_path, f) for f in uma_files]
     destination_files = [os.path.join(add_hw_path, f) for f in uma_files]
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index 04e44b8a4a7c..e57b96eb7bc4 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -47,7 +47,7 @@ def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
 
     uma_path = pathlib.Path(str(tvm.relay.backend.contrib.uma.__file__)).parent.absolute()
-    conv2d_file = uma_path / "_template" / "conv2dnchw.cpp"
+    conv2d_file = uma_path / "_template" / "conv2dnchw.cc"
 
     with conv2d_file.open() as f:
         sch_tir = _create_schedule(

From 5c4eeae60c4ecbec015092df7be1829607b17614 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 14 Jul 2022 16:15:59 +0000
Subject: [PATCH 074/112] [UMA] lint fix

---
 python/tvm/relay/backend/contrib/uma/_template/backend.py | 5 +----
 python/tvm/relay/backend/contrib/uma/_template/codegen.py | 5 +++--
 python/tvm/relay/backend/contrib/uma/_template/run.py     | 2 +-
 python/tvm/relay/backend/contrib/uma/uma_cli.py           | 5 ++++-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 3857376b6344..e1c4e16920d8 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -43,13 +43,10 @@ def __init__(self):
         #######################################################################
         self._register_tir_pass(PassPhase.TIR_PHASE_0, MyAiHwConv2dPass())
 
-
         #######################################################################
         # TIR to runtime function registration
         #######################################################################
-        self._register_codegen(
-            fmt="c", includes=gen_includes
-        )
+        self._register_codegen(fmt="c", includes=gen_includes)
 
     @property
     def target_name(self):
diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
index 6cf33d42616e..8e33ce9b2089 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/codegen.py
@@ -19,11 +19,12 @@
 import tvm
 import pathlib
 
+
 def gen_includes() -> str:
     topdir = pathlib.Path(__file__).parent.absolute()
-    
+
     includes = ""
-    includes += f"#include \"{topdir}/conv2dnchw.cpp\""
+    includes += f'#include "{topdir}/conv2dnchw.cc"'
     return includes
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index a044eb870b7e..8f696bb96019 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -80,7 +80,7 @@ def main():
         interface_api="c",
         use_unpacked_api=True,
         target=target,
-        test_dir=str(export_directory)
+        test_dir=str(export_directory),
     )
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/python/tvm/relay/backend/contrib/uma/uma_cli.py
index 0d350d173eae..460781f14035 100644
--- a/python/tvm/relay/backend/contrib/uma/uma_cli.py
+++ b/python/tvm/relay/backend/contrib/uma/uma_cli.py
@@ -47,6 +47,9 @@ def _parse_args():
 def replace_template_name(
     files: list, template_name: str, add_hw_name: str, template_source: str = "_template"
 ) -> None:
+    """
+    Replace names in template skeleton code by new name
+    """
     for f in files:
         with open(f) as read_file:
             data = read_file.read()
@@ -59,7 +62,7 @@ def replace_template_name(
 
 def main():
     """
-        UMA Command Line Interface (CLI)
+    UMA Command Line Interface (CLI)
     """
     args = _parse_args()
     add_hw_name = args.add_hardware

From 7fda363939ea2e20de6704dd3a26d78ecca5a247 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 15 Jul 2022 12:40:56 +0000
Subject: [PATCH 075/112] [UMA] fix of build scripts for arm and i386

---
 tests/scripts/task_config_build_arm.sh  | 1 +
 tests/scripts/task_config_build_i386.sh | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/scripts/task_config_build_arm.sh b/tests/scripts/task_config_build_arm.sh
index 189bdc250a8c..a01c1ed6d082 100755
--- a/tests/scripts/task_config_build_arm.sh
+++ b/tests/scripts/task_config_build_arm.sh
@@ -34,4 +34,5 @@ echo set\(USE_VTA_FSIM ON\) >> config.cmake
 echo set\(USE_ARM_COMPUTE_LIB ON\) >> config.cmake
 echo set\(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "/opt/acl"\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
+echo set\(USE_UMA ON\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
diff --git a/tests/scripts/task_config_build_i386.sh b/tests/scripts/task_config_build_i386.sh
index c92aed3c1450..20f6c4bf0749 100755
--- a/tests/scripts/task_config_build_i386.sh
+++ b/tests/scripts/task_config_build_i386.sh
@@ -34,5 +34,6 @@ echo set\(USE_VTA_FSIM ON\) >> config.cmake
 echo set\(USE_VTA_TSIM ON\) >> config.cmake
 echo set\(USE_VERILATOR ON\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
+echo set\(USE_UMA ON\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
 

From 16306dd421ba63f168e6a5cbe1b71532933003ee Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Thu, 14 Jul 2022 22:30:23 +0200
Subject: [PATCH 076/112] Fix remaining linter errors

---
 .../tvm/relay/backend/contrib/uma/__init__.py | 16 +++++
 .../contrib/uma/_template/conv2dnchw.cc       | 22 +++---
 .../tvm/relay/backend/contrib/uma/tutorial.md | 17 +++++
 src/relay/backend/contrib/uma/relay_to_tir.cc |  7 +-
 src/relay/backend/contrib/uma/targets.cc      | 68 ++++++++++---------
 .../backend/contrib/uma/tir_to_runtime.cc     | 17 +++--
 6 files changed, 98 insertions(+), 49 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
index e69de29bb2d1..13a83393a912 100644
--- a/python/tvm/relay/backend/contrib/uma/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
index 20035fa2aa9a..18c5855c46ef 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
@@ -21,8 +21,9 @@
 #ifdef __cplusplus
 extern "C"
 #endif
-int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
-                                   int oc, int iw, int ih, int ic, int kh, int kw) {
+    int
+    my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, int iw, int ih, int ic,
+                        int kh, int kw) {
 
   int kw_low = kw / 2;
   int kh_low = kh / 2;
@@ -32,7 +33,8 @@ int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
   int padded_iw = iw + 2 * kw_low;
   int padded_ih = ih + 2 * kh_low;
 
-  float* pad_temp = (float*) malloc((((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
+  float* pad_temp = (float*)malloc(
+      (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
 
   if (pad_temp == NULL) {
     return -1;
@@ -42,7 +44,9 @@ int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
     for (int i2 = 0; i2 < padded_ih; ++i2) {
       for (int i3 = 0; i3 < padded_iw; ++i3) {
         ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] =
-           (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high)) ? ifmap[((((i1 * iw * ih) + ((i2-kh_low) * iw)) + i3 - kw_low) )] : 0.000000e+00f);
+            (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high))
+                 ? ifmap[((((i1 * iw * ih) + ((i2 - kh_low) * iw)) + i3 - kw_low))]
+                 : 0.000000e+00f);
       }
     }
   }
@@ -52,13 +56,15 @@ int my_ai_hw_conv2dnchw(float* ifmap, float*  weights, float*  result,
         for (int i4 = 0; i4 < ic; ++i4) {
           for (int i5 = 0; i5 < kh; ++i5) {
             for (int i6 = 0; i6 < kw; ++i6) {
-              int cse_var_1 = (((i11 * iw*ih) + (i21 * iw)) + i31);
+              int cse_var_1 = (((i11 * iw * ih) + (i21 * iw)) + i31);
               if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
                 result[cse_var_1] = 0.000000e+00f;
               }
-              result[cse_var_1] = (result[cse_var_1]
-              + (((float*)pad_temp)[i4 * padded_iw * padded_ih + (i21+i5) * padded_iw + i31 + i6]
-              * weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
+              result[cse_var_1] =
+                  (result[cse_var_1] +
+                   (((float*)
+                         pad_temp)[i4 * padded_iw * padded_ih + (i21 + i5) * padded_iw + i31 + i6] *
+                    weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
             }
           }
         }
diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
index 842ce97782de..d22263ef9471 100644
--- a/python/tvm/relay/backend/contrib/uma/tutorial.md
+++ b/python/tvm/relay/backend/contrib/uma/tutorial.md
@@ -1,3 +1,20 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
 Making your hardware accelerator TVM-ready with UMA 
 =============================================
 
diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index 10456ca10539..5bb64663efe8 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -146,8 +146,8 @@ tvm::transform::Pass OutlineCompilerFunctions(const std::string& compiler_name)
         }
         return mod;
       };
-  return tvm::transform::CreateModulePass(
-      pass_func, 0, "relay.backend.contrib.uma.OutlineCompilerFunctions", {});
+  return tvm::transform::CreateModulePass(pass_func, 0,
+                                          "relay.backend.contrib.uma.OutlineCompilerFunctions", {});
 }
 
 TVM_REGISTER_GLOBAL("relay.ext.uma.OutlineCompilerFunctions")
@@ -159,7 +159,8 @@ TVM_REGISTER_GLOBAL("relay.ext.uma.OutlineCompilerFunctions")
 tvm::transform::Pass RelayToTIR(String target_name) {
   runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
       [=](IRModule ir_module, transform::PassContext pass_context) {
-        auto relay_to_tir_pf = tvm::runtime::Registry::Get("relay.ext.uma." + target_name + ".relay_to_tir");
+        auto relay_to_tir_pf =
+            tvm::runtime::Registry::Get("relay.ext.uma." + target_name + ".relay_to_tir");
         ICHECK(relay_to_tir_pf);
         ir_module = (*relay_to_tir_pf)(ir_module);
         return ir_module;
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 6693df5b4ce3..0790315cf06b 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -31,44 +31,50 @@ namespace tvm {
 namespace relay {
 namespace contrib {
 namespace uma {
-    tvm::transform::Pass RelayToTIR(String target_name);
-    runtime::Module TIRToRuntime(IRModule mod, Target target);
+tvm::transform::Pass RelayToTIR(String target_name);
+runtime::Module TIRToRuntime(IRModule mod, Target target);
 }  // namespace uma
 }  // namespace contrib
 }  // namespace relay
 
 TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
-    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options){
-        auto target_kind = ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
-        .set_name()
-        .set_device_type(kDLCPU)
-        .add_attr_option<Array<String>>("keys")
-        .add_attr_option<String>("tag")
-        .add_attr_option<String>("device")
-        .add_attr_option<String>("model")
-        .add_attr_option<Array<String>>("libs")
-        .add_attr_option<Target>("host")
-        .add_attr_option<Integer>("from_device")
-        .set_attr<FTVMRelayToTIR>("RelayToTIR", relay::contrib::uma::RelayToTIR(target_name))
-        .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
+    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options) {
+      auto target_kind =
+          ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
+              .set_name()
+              .set_device_type(kDLCPU)
+              .add_attr_option<Array<String>>("keys")
+              .add_attr_option<String>("tag")
+              .add_attr_option<String>("device")
+              .add_attr_option<String>("model")
+              .add_attr_option<Array<String>>("libs")
+              .add_attr_option<Target>("host")
+              .add_attr_option<Integer>("from_device")
+              .set_attr<FTVMRelayToTIR>(tvm::attr::kRelayToTIR,
+                                        relay::contrib::uma::RelayToTIR(target_name))
+              .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
-        for (auto &attr_option : attr_options) {
-          try {
-            target_kind.add_attr_option<String>(attr_option.first, Downcast<String>(attr_option.second));
-            continue;
-          } catch (...) {}
-          try {
-            target_kind.add_attr_option<Bool>(attr_option.first, Downcast<Bool>(attr_option.second));
-            continue;
-          } catch (...) {}
-          try {
-            target_kind.add_attr_option<Integer>(attr_option.first, Downcast<Integer>(attr_option.second));
-            continue;
-          } catch (...) {
-            LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey() 
-                       << " can not be added. Only String, Integer, or Bool are supported.";
-          }
+      for (auto& attr_option : attr_options) {
+        try {
+          target_kind.add_attr_option<String>(attr_option.first,
+                                              Downcast<String>(attr_option.second));
+          continue;
+        } catch (...) {
         }
+        try {
+          target_kind.add_attr_option<Bool>(attr_option.first, Downcast<Bool>(attr_option.second));
+          continue;
+        } catch (...) {
+        }
+        try {
+          target_kind.add_attr_option<Integer>(attr_option.first,
+                                               Downcast<Integer>(attr_option.second));
+          continue;
+        } catch (...) {
+          LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey()
+                     << " can not be added. Only String, Integer, or Bool are supported.";
+        }
+      }
     });
 
 }  // namespace tvm
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index d65e1d84a143..474956ae6763 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -35,10 +35,11 @@ namespace uma {
 
 class UMACodegen : public codegen::CodeGenCHost {
  public:
-  UMACodegen(String target_str) : target_str_(target_str) {}
+  explicit UMACodegen(String target_str) : target_str_(target_str) {}
 
   void Init(bool output_ssa, bool emit_asserts) {
-    auto includes_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str_);
+    auto includes_pf =
+        tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str_);
     ICHECK(includes_pf);
     String includes = (*includes_pf)();
     decl_stream << includes;
@@ -65,13 +66,15 @@ class UMACodegen : public codegen::CodeGenCHost {
       CodeGenCHost::VisitExpr_(op, os);
       return;
     }
-    auto replace_call_extern_pf = tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_replace_call_extern_" + target_str_);
+    auto replace_call_extern_pf =
+        tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_replace_call_extern_" + target_str_);
     if (replace_call_extern_pf == nullptr) {
       CodeGenCHost::VisitExpr_(op, os);
     } else {
-      // TODO:
-      // - funtion type (void) still gets printed before CallNode if extern call is wrapped in EvaluateNode
-      // - VarNode arguments might have "wrong" name_hints. The correct variable name is determined in C++ through GetVarID
+      // - funtion type (void) still gets printed before CallNode if extern call is wrapped in
+      // EvaluateNode
+      // - VarNode arguments might have "wrong" name_hints. The correct variable name is determined
+      // in C++ through GetVarID
       String api_string = (*replace_call_extern_pf)(op->args);
       os << api_string;
     }
@@ -82,7 +85,7 @@ class UMACodegen : public codegen::CodeGenCHost {
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
   bool emit_asserts = false;
-  UMACodegen codegen (target->kind->name);
+  UMACodegen codegen(target->kind->name);
   Array<String> function_names;
   codegen.Init(output_ssa, emit_asserts);
   for (auto kv : mod->functions) {

From e8a45ca3a85cdfc2df37358625e764f1023c1d61 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 18 Jul 2022 08:40:43 +0000
Subject: [PATCH 077/112] [UMA] CMakeLists.txt added UMA tvm_option

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b15fa9847e52..7dd061954156 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,7 @@ tvm_option(USE_VITIS_AI "Build with VITIS-AI Codegen support" OFF)
 tvm_option(SUMMARIZE "Print CMake option summary after configuring" OFF)
 tvm_option(USE_CLML "Build with CLML Codegen support" OFF)
 tvm_option(USE_CLML_GRAPH_EXECUTOR "Build with CLML graph runtime" OFF)
+tvm_option(USE_UMA "Build with UMA support" OFF)
 
 # include directories
 include_directories(${CMAKE_INCLUDE_PATH})

From 6c2fb047e42815603d2ab5c5040654ff2fc18be2 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 18 Jul 2022 08:55:28 +0000
Subject: [PATCH 078/112] [UMA] added UMA tvm_option

---
 cmake/modules/LibInfo.cmake | 1 +
 src/support/libinfo.cc      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
index 3b3d8a4bcc9a..6bc8f6b46390 100644
--- a/cmake/modules/LibInfo.cmake
+++ b/cmake/modules/LibInfo.cmake
@@ -116,6 +116,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_VULKAN="${USE_VULKAN}"
     TVM_INFO_USE_CLML="${USE_CLML}"
     TVM_INFO_USE_CLML_GRAPH_EXECUTOR="${USE_CLML_GRAPH_EXECUTOR}"
+    TVM_INFO_USE_UMA="${USE_UMA}"
   )
 
 endfunction()
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index 6f0a6114f3d9..4b2f6034730d 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -317,6 +317,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_VULKAN", TVM_INFO_USE_VULKAN},
       {"USE_CLML", TVM_INFO_USE_CLML},
       {"USE_CLML_GRAPH_EXECUTOR", TVM_INFO_USE_CLML_GRAPH_EXECUTOR},
+      {"USE_UMA", TVM_INFO_USE_UMA},
   };
   return result;
 }

From 646b94ca235471343f9af158f124f21a64ff0363 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 19 Jul 2022 12:00:03 +0000
Subject: [PATCH 079/112] [UMA] guard against multiple registrations

---
 python/tvm/relay/backend/contrib/uma/backend.py |  8 ++++----
 src/relay/backend/contrib/uma/targets.cc        | 11 ++++++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 335c651dcd20..024964edd54a 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -287,11 +287,11 @@ def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
     ############################################################################
     def register(self) -> None:
         registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
-        registration_func(self.target_name, self._target_attrs)
 
-        self._relay_to_relay.register()
-        self._relay_to_tir.register()
-        self._tir_to_runtime.register()
+        if registration_func(self.target_name, self._target_attrs):
+            self._relay_to_relay.register()
+            self._relay_to_tir.register()
+            self._tir_to_runtime.register()
 
     def partition(
         self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 0790315cf06b..d8a2d40f4c7d 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -38,7 +38,15 @@ runtime::Module TIRToRuntime(IRModule mod, Target target);
 }  // namespace relay
 
 TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
-    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options) {
+    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options) -> bool{
+        //@todo(cgerum): We probably should get rid of target.register rather sooner than later
+        //               And use a proper registry for uma backends
+        for(const String registered_target_name  : ::tvm::TargetKindRegEntry::ListTargetKinds()){
+          if(registered_target_name == target_name){
+            return false;
+          }
+        }
+
       auto target_kind =
           ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
               .set_name()
@@ -75,6 +83,7 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
                      << " can not be added. Only String, Integer, or Bool are supported.";
         }
       }
+      return true;
     });
 
 }  // namespace tvm

From a920007ebfc34fe58554fab7ae5b09ab2aca05eb Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 20 Jul 2022 13:22:18 +0000
Subject: [PATCH 080/112] [UMA] fixed comments as pointed out in PR 12087

---
 include/tvm/relay/transform.h                             | 1 -
 python/tvm/relay/backend/contrib/uma/_template/backend.py | 2 +-
 python/tvm/relay/backend/contrib/uma/tutorial.md          | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/tvm/relay/transform.h b/include/tvm/relay/transform.h
index 063928842a1b..6b26b9eff474 100644
--- a/include/tvm/relay/transform.h
+++ b/include/tvm/relay/transform.h
@@ -509,7 +509,6 @@ TVM_DLL Pass SimplifyExpr();
  *
  * \param config All available targets.
  *
- * \param config All available targets.
  *
  * \return The pass.
  */
diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index e1c4e16920d8..4f2a50e395eb 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -34,7 +34,7 @@ def __init__(self):
         self._register_target_attr("dimension")
 
         #######################################################################
-        # Relay to Relay function registration
+        # Relay Pattern registration
         #######################################################################
         self._register_pattern("conv2d", conv2d_pattern())
 
diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
index d22263ef9471..f669dcb62539 100644
--- a/python/tvm/relay/backend/contrib/uma/tutorial.md
+++ b/python/tvm/relay/backend/contrib/uma/tutorial.md
@@ -50,7 +50,7 @@ For **Vanilla** we use it like this:
 
 ```
 cd tvm/python/tvm/relay/backend/contrib/uma
-python uma_cli.py --add-accelerator vanilla_accelerator --tutorial vanilla
+python uma_cli.py --add-hardware vanilla_accelerator --tutorial vanilla
 ```
 The option `--tutorial vanilla` adds all the additional files required for this part of the tutorial.
 

From 6ce6fa09848019532a11396752adf47407ad7bb2 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 21 Jul 2022 09:46:47 +0000
Subject: [PATCH 081/112] [UMA] fixed comments as pointed out in PR 12087

---
 cmake/config.cmake                            |  3 ++
 .../backend/contrib/uma/_template/run.py      | 34 ++++++++-----------
 .../tvm/relay/backend/contrib/uma/backend.py  |  4 +--
 .../test_uma/test_uma_vanilla_accelerator.py  |  5 ---
 4 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 356fd7b4b1b5..18725de844b2 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -296,6 +296,9 @@ set(USE_VTA_FPGA OFF)
 # Whether use Thrust
 set(USE_THRUST OFF)
 
+# Whether use cuRAND
+set(USE_CURAND OFF)
+
 # Whether to build the TensorFlow TVMDSOOp module
 set(USE_TF_TVMDSOOP OFF)
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/python/tvm/relay/backend/contrib/uma/_template/run.py
index 8f696bb96019..978b393af08e 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/run.py
@@ -15,39 +15,33 @@
 # specific language governing permissions and limitations
 # under the License.
 from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-
-from tvm.testing.aot import compile_and_run, AOTTestModel, AOTTestRunner
-
 import tvm
 from tvm import relay
 from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
 from tvm.relay import transform
 from collections import OrderedDict
-
 import numpy as np
-import tarfile
-from pathlib import Path
-import onnx
+
 
 from tvm.testing.aot import (
-    AOTTestModel,
-    AOTTestRunner,
+    AOTTestModel as AOTModel,
+    AOTTestRunner as AOTRunner,
     generate_ref_data,
     compile_and_run,
 )
 
 
-def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
+def create_conv2d(groups=1, runner=AOT_DEFAULT_RUNNER, weight_shape=32):
     dtype = "float32"
     ishape = (1, 32, 14, 14)
     wshape = (32, weight_shape, 3, 3)
     pass_config = {"tir.usmp.enable": True}
-    test_runner = AOTTestRunner(
-        makefile=test_runner.makefile,
-        prologue=test_runner.prologue,
-        epilogue=test_runner.epilogue,
-        includes=test_runner.includes,
-        parameters=test_runner.parameters,
+    runner = AOTRunner(
+        makefile=runner.makefile,
+        prologue=runner.prologue,
+        epilogue=runner.epilogue,
+        includes=runner.includes,
+        parameters=runner.parameters,
         pass_config=pass_config,
     )
     data0 = relay.var("data", shape=ishape, dtype=dtype)
@@ -61,11 +55,11 @@ def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
     w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
     inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
     output_list = generate_ref_data(mod, inputs)
-    return mod, inputs, output_list, test_runner
+    return mod, inputs, output_list, runner
 
 
 def main():
-    mod, inputs, output_list, test_runner = create_conv2d()
+    mod, inputs, output_list, runner = create_conv2d()
 
     uma_backend = MyAiHwBackend()
     uma_backend.register()
@@ -75,8 +69,8 @@ def main():
     export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
     print(f"Generated files are in {export_directory}")
     compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        test_runner,
+        AOTModel(module=mod, inputs=inputs, outputs=output_list),
+        runner,
         interface_api="c",
         use_unpacked_api=True,
         target=target,
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 024964edd54a..363b1de33a3b 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -124,10 +124,10 @@ def _register_pattern(
         Parameters
         ----------
         name: str
-           The name of the pattern.
+           The name of the pattern
 
         pattern: tvm.relay.dataflow_pattern.DFPattern
-            The dataflow pattern.
+            Relay DFPattern
 
         predicate: Callable Receiving the matched pattern and
 
diff --git a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
index 96dc2508d9ab..54dc47bc1a97 100644
--- a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
+++ b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
@@ -29,11 +29,6 @@
 
 from tvm.relay.backend.contrib.uma._template.patterns import conv2d_pattern
 
-# def conv2d_pattern():
-#     pattern = is_op("nn.conv2d")(wildcard(), wildcard())
-#     pattern = pattern.has_attr({"strides": [1, 1]})
-#     return pattern
-
 
 class VanillaAcceleratorBackend(UMABackend):
     """UMA backend for the VanillaAccelerator accelerator."""

From efb6e56e03731f39e891dd44f4a58158665f5966 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Fri, 15 Jul 2022 15:19:20 +0000
Subject: [PATCH 082/112] [UMA] skip uma tests if uma is not available

---
 python/tvm/relay/backend/contrib/uma/__init__.py         | 5 +++++
 python/tvm/relay/backend/contrib/uma/api/utils.py        | 9 +++++++++
 tests/python/contrib/test_uma/test_partition.py          | 3 +++
 tests/python/contrib/test_uma/test_target.py             | 3 +++
 .../contrib/test_uma/test_uma_lowering_with_umalower.py  | 4 ++++
 tests/python/contrib/test_uma/test_uma_pipeline.py       | 4 ++++
 .../contrib/test_uma/test_uma_vanilla_accelerator.py     | 3 +++
 7 files changed, 31 insertions(+)

diff --git a/python/tvm/relay/backend/contrib/uma/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
index 13a83393a912..e55210d1b5d6 100644
--- a/python/tvm/relay/backend/contrib/uma/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/__init__.py
@@ -14,3 +14,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
+from .backend import UMABackend
+from .api.utils import uma_available
+
+__all__ = ["UMABackend", "uma_available"]
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index cef6a5c229d4..de011dafa543 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -18,10 +18,19 @@
 
 from enum import Enum, auto
 import uuid
+
+import tvm
 import tvm.tir
 from tvm.contrib import utils, clang
 
 
+def uma_available() -> bool:
+    registration_func = tvm.get_global_func(
+        "relay.backend.contrib.uma.RegisterTarget", allow_missing=True
+    )
+    return registration_func is not None
+
+
 class PassPhase(Enum):
     """UMA pass phases."""
 
diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index 6922a4fa2c50..f35a2ada1e2c 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -22,6 +22,9 @@
 from tvm.relay.backend.contrib.uma.api import UMAPartitioner
 from tvm.relay.op.contrib.register import get_pattern_table
 from tvm.relay.testing import resnet, mlp
+from tvm.relay.backend.contrib.uma import uma_available
+
+pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
 
 def test_partition_table():
diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index bc6d93877479..bb57e1ae4fa2 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -17,6 +17,9 @@
 
 import pytest
 import tvm
+from tvm.relay.backend.contrib.uma import uma_available
+
+pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
 
 @pytest.mark.parametrize(
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index e57b96eb7bc4..7109ba01a607 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -25,6 +25,10 @@
 from tvm import te
 from tvm.relay.backend.contrib.uma.api.lower import UMALower
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+from tvm.relay.backend.contrib.uma import uma_available
+
+
+pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
 
 def _conv2d_te_definition(shapes: dict) -> list:
diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
index 5715eab31fbe..b53dd9c4e894 100644
--- a/tests/python/contrib/test_uma/test_uma_pipeline.py
+++ b/tests/python/contrib/test_uma/test_uma_pipeline.py
@@ -31,6 +31,10 @@
 import numpy as np
 from collections import OrderedDict
 
+from tvm.relay.backend.contrib.uma import uma_available
+
+pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
+
 
 @pytest.mark.parametrize(
     "interface_api,use_unpacked_api,test_runner,groups,weight_shape",
diff --git a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
index 54dc47bc1a97..c8701f7905db 100644
--- a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
+++ b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
@@ -28,6 +28,9 @@
 from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
 
 from tvm.relay.backend.contrib.uma._template.patterns import conv2d_pattern
+from tvm.relay.backend.contrib.uma import uma_available
+
+pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
 
 class VanillaAcceleratorBackend(UMABackend):

From a6796720ea3016b53ddc33ca311c4803fb2a2afb Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 22 Jul 2022 09:04:13 +0000
Subject: [PATCH 083/112] [UMA] added UMA rst

---
 gallery/tutorial/uma.py | 220 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 gallery/tutorial/uma.py

diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
new file mode 100644
index 000000000000..985c2ab4fce6
--- /dev/null
+++ b/gallery/tutorial/uma.py
@@ -0,0 +1,220 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+.. _tutorial-uma:
+
+Making your hardware accelerator TVM-ready with UMA
+====================
+**Author**: `Michael J. Klaiber <https://github.com/MichaelJKlaiber>`_
+
+This is an introductory tutorial to the Universal Modular Accelerator Interface (UMA).
+UMA provides an easy-to-use API to integrate new hardware accelerators into TVM.
+
+This tutorial gives you step-by-step guidance how to use UMA to
+make your hardware accelerator TVM-ready.
+While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
+API to integrate a number of hardware accelerator classes into TVM.
+
+
+In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
+In these use case the three mock-accelerators
+**Vanilla**, **Strawberry** and **Chocolate** are introduced and
+integrated into TVM using UMA.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+import tvm
+from tvm.relay.backend.contrib.uma.backend import UMABackend
+from tvm.relay.dataflow_pattern import is_op, wildcard
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass as VanillaAcceleratorConv2DPass
+
+
+######################################################################
+# Vanilla
+# -------------
+# **Vanilla** is a simple accelerator consisting of a MAC array and has no internal memory.
+# It is can ONLY process Conv2D layers, all other layers are executed on a CPU, that also orchestrates **Vanilla**.
+# Both the CPU and Vanilla use a shared memory.
+#
+# For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accepts pointers to input data *if_map*,
+# *weights* and *result* data, as well as the parameters of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
+# ```c
+# int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
+# ```
+# .. image:: https://raw.githubusercontent.com/apache/tvm-site/main/images/tutorial/overview.png
+#   :width: 100%
+#   :alt: A High Level View of TVM
+#
+# The script `uma_cli` creates code skeletons with API-calls into the UMA-API for new accelerators.
+# For **Vanilla** we use it like this:
+#
+# .. code-block:: bash
+#
+# cd tvm/python/tvm/relay/backend/contrib/uma
+# python uma_cli.py --add_hardware vanilla_accelerator --tutorial vanilla
+#
+
+################################################################################
+# Step 1: Vanilla backend
+# -----------------------
+#
+class VanillaAcceleratorBackend(UMABackend):
+    """UMA backend for VanillaAccelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        self._register_pattern("conv2d", conv2d_pattern())
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2DPass())
+        self._register_codegen(fmt="c", includes=gen_includes)
+
+    @property
+    def target_name(self):
+        return "vanilla_accelerator"
+
+# It is found in `tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator/backend.py`.
+#
+
+################################################################################
+# Step 2: Define offloaded patterns
+# ------------------------------------
+#
+# To specify that `Conv2D` is offloaded to **Vanilla**, we describe it as Relay dataflow pattern in
+# `patterns.py`
+#  `DFPattern <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_
+#
+
+
+def conv2d_pattern():
+    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
+    pattern = pattern.has_attr({"strides": [1, 1]})
+    return pattern
+
+
+
+# To map **Conv2D** operations from input graph  to **Vanilla**'s
+# low level function call, TIR pass
+# *VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial)
+# is registered in `VanillaAcceleratorBackend`.
+
+
+
+# Step 3: Modify Codegen
+# ---
+# ```
+# self._register_codegen(fmt="c", includes=gen_includes)
+# ```
+#
+# We tell TVM to create C code using ``fmt="c"`` via
+# `self._register_codegen`. As `Conv2D` layers should be executed via Vanilla's
+# C interface `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
+# `#include` statement.
+
+# This is done by providing the include-string like this:
+#
+# .. code-block:: python
+#
+# # in vanilla_accelerator/backend.py
+# self._register_codegen(fmt="c", includes=gen_includes)
+# # in vanilla_accelerator/codegen.py
+# def gen_includes() -> str:
+#     return "#include \"conv2dnchw.cc\""
+
+###########################################################
+# Step 4: Building the Neural Network and run it on Vanilla
+# ----------------------------------------------------------
+#
+# In this step we generate C code for a single Conv2D layer and run it on
+# the Vanilla accelerator.
+# The file ``vanilla_accelerator/run.py`` provides a demo running a Conv2D layer
+# making use of Vanilla's C-API.
+#
+# By running ``vanilla_accelerator/run.py`` the output files are generated in the model library format (MLF).
+#
+
+# Output:
+# .. code-block:: bash
+#
+# Generated files are in /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
+
+
+# Let's examine the generated files:
+# Output:
+# .. code-block:: bash
+#
+# cd /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
+# cd build/
+# ls -1
+#
+# codegen
+# lib.tar
+# metadata.json
+# parameters
+# runtime
+# src
+
+# To evaluate the generated C code go to ``codegen/host/src/default_lib2.c``
+# ```
+# $ cd codegen/host/src/
+# $ ls -1
+# default_lib0.c
+# default_lib1.c
+# default_lib2.c
+# ```
+# In `default_lib2.c` you can now see that the generated code calls
+# into Vanilla's C-API
+# .. code-block:: c
+# TVM_DLL int32_t tvmgen_default_vanilla_accelerator_main_0(float* placeholder, float* placeholder1, float* conv2d_nchw, uint8_t* global_workspace_1_var) {
+#   vanilla_accelerator_conv2dnchw(placeholder, placeholder1, conv2d_nchw, 32, 14, 14, 32, 3, 3);
+#   return 0;
+# }
+#
+
+###########################################################
+# Strawberry
+# ---
+# TBD
+
+###########################################################
+# Chocolate
+# ---
+# TBD
+#
+
+######################################################################
+# More
+# ----
+# Did this tutorial **not** fit to your accelerator? Please add your requirements to the UMA thread in
+# the TVM discuss forum: `Link <https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039>`_.
+# We are eager to extend this tutorial to provide guidance on making further classes of AI hardware
+# accelerators TVM-ready using the UMA interface.
+#
+
+######################################################################
+# References
+# -----------
+# [UMA-RFC] `UMA: Universal Modular Accelerator Interface <https://github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md>`_,
+# TVM RFC, June 2022.
+#
+# [DFPattern] `Pattern Matching in Relay <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_
+#

From 33791fc5b18841e93aedc1647716572575650a55 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 22 Jul 2022 16:41:12 +0200
Subject: [PATCH 084/112] [UMA] Moved tutorial to RST file in gallery

---
 docs/conf.py                                  |   1 +
 gallery/tutorial/uma.py                       | 220 ++++++++++++------
 .../tvm/relay/backend/contrib/uma/tutorial.md | 195 ----------------
 3 files changed, 145 insertions(+), 271 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/uma/tutorial.md

diff --git a/docs/conf.py b/docs/conf.py
index 82b0d2962338..d645958ca6db 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -264,6 +264,7 @@ def git_describe_version(original_version):
         "topi.pi",
         "cross_compilation_and_rpc.py",
         "relay_quick_start.py",
+        "uma.py",
     ],
     "compile_models": [
         "from_pytorch.py",
diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index 985c2ab4fce6..bec124c1947e 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -17,11 +17,11 @@
 """
 .. _tutorial-uma:
 
-Making your hardware accelerator TVM-ready with UMA
-====================
+Making your Hardware Accelerator TVM-ready with UMA
+===================================================
 **Author**: `Michael J. Klaiber <https://github.com/MichaelJKlaiber>`_
 
-This is an introductory tutorial to the Universal Modular Accelerator Interface (UMA).
+This is an introductory tutorial to the **Universal Modular Accelerator Interface** (UMA).
 UMA provides an easy-to-use API to integrate new hardware accelerators into TVM.
 
 This tutorial gives you step-by-step guidance how to use UMA to
@@ -47,6 +47,7 @@
 from tvm.relay.dataflow_pattern import is_op, wildcard
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
 from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass as VanillaAcceleratorConv2DPass
+from tvm.relay.backend.contrib.uma._template.codegen import gen_includes 
 
 
 ######################################################################
@@ -56,28 +57,55 @@
 # It is can ONLY process Conv2D layers, all other layers are executed on a CPU, that also orchestrates **Vanilla**.
 # Both the CPU and Vanilla use a shared memory.
 #
-# For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accepts pointers to input data *if_map*,
-# *weights* and *result* data, as well as the parameters of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
-# ```c
-# int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
-# ```
-# .. image:: https://raw.githubusercontent.com/apache/tvm-site/main/images/tutorial/overview.png
+
+######################################################################
+# .. image:: https://raw.githubusercontent.com/apache/tvm-site/main/images/tutorial/uma_vanilla_block_diagram.png
 #   :width: 100%
-#   :alt: A High Level View of TVM
+#   :alt: A block diagram of Vanilla
 #
+
+######################################################################
+# **Vanilla** has a C interface ``vanilla_conv2dnchw(...)``` for carrying out a Conv2D operation (including same-padding), 
+# that accepts pointers to input feature map, weights and result,
+# as well as the dimensions of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
+#
+# .. code-block:: c++
+#
+#   int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
+
+
+
+
+################################################################################
 # The script `uma_cli` creates code skeletons with API-calls into the UMA-API for new accelerators.
-# For **Vanilla** we use it like this:
+#
+# For **Vanilla** we use it as follows: (``--tutorial vanilla`` adds all the additional files required for this part of the tutorial)
 #
 # .. code-block:: bash
 #
-# cd tvm/python/tvm/relay/backend/contrib/uma
-# python uma_cli.py --add_hardware vanilla_accelerator --tutorial vanilla
+#   cd $TVM_HOME/apps
+#   python uma_cli.py --add_hardware vanilla_accelerator --tutorial vanilla
 #
 
 ################################################################################
-# Step 1: Vanilla backend
-# -----------------------
+# uma_cli.py generates these files in the directory ``vanilla_accelerator`` which we are going to revist.
 #
+# .. code-block:: bash
+#
+#   backend.py
+#   codegen.py
+#   conv2dnchw.cc
+#   passes.py
+#   patterns.py
+#   run.py
+#   strategies.py
+
+
+################################################################################
+# Vanilla backend
+# 
+#  The generated backend for vanilla is found in `vanilla_accelerator/backend.py`:
+
 class VanillaAcceleratorBackend(UMABackend):
     """UMA backend for VanillaAccelerator."""
 
@@ -92,17 +120,13 @@ def __init__(self):
     def target_name(self):
         return "vanilla_accelerator"
 
-# It is found in `tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator/backend.py`.
-#
+
 
 ################################################################################
-# Step 2: Define offloaded patterns
-# ------------------------------------
-#
-# To specify that `Conv2D` is offloaded to **Vanilla**, we describe it as Relay dataflow pattern in
-# `patterns.py`
-#  `DFPattern <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_
+# Define offloaded patterns
 #
+# To specify that `Conv2D` is offloaded to **Vanilla**, it is described as Relay dataflow pattern (`DFPattern <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_) in
+# `vanilla_accelerator/patterns.py`
 
 
 def conv2d_pattern():
@@ -111,100 +135,144 @@ def conv2d_pattern():
     return pattern
 
 
-
-# To map **Conv2D** operations from input graph  to **Vanilla**'s
-# low level function call, TIR pass
+################################################################################
+# To map **Conv2D** operations from the input graph  to **Vanilla**'s
+# low level function call ``vanilla_conv2dnchw(...)``, the TIR pass
 # *VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial)
 # is registered in `VanillaAcceleratorBackend`.
 
 
+################################################################################
+# Codegen
 
-# Step 3: Modify Codegen
-# ---
-# ```
-# self._register_codegen(fmt="c", includes=gen_includes)
-# ```
+################################################################################
+# The file ``vanilla_accelerator/codegen.py`` defines static  C-code that is added to the
+# resulting C-Code generated by TVMś C-Codegen in ``gen_includes``.
+# Here C-code is added to include **Vanilla**'s low level library``vanilla_conv2dnchw()``.
 #
-# We tell TVM to create C code using ``fmt="c"`` via
-# `self._register_codegen`. As `Conv2D` layers should be executed via Vanilla's
-# C interface `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
-# `#include` statement.
+# .. code-block:: python
+#
+#  def gen_includes() -> str:
+#      topdir = pathlib.Path(__file__).parent.absolute()
+#
+#      includes = ""
+#      includes += f'#include "{topdir}/conv2dnchw.cc"'
+#      return includes
+
 
-# This is done by providing the include-string like this:
+################################################################################
+# As shown above in `VanillaAcceleratorBackend` it is registered to UMA with
+# the `self._register_codegen`
 #
-# .. code-block:: python
+# .. code-block:: python 
 #
-# # in vanilla_accelerator/backend.py
-# self._register_codegen(fmt="c", includes=gen_includes)
-# # in vanilla_accelerator/codegen.py
-# def gen_includes() -> str:
-#     return "#include \"conv2dnchw.cc\""
+#   self._register_codegen(fmt="c", includes=gen_includes)
+
+
 
 ###########################################################
-# Step 4: Building the Neural Network and run it on Vanilla
-# ----------------------------------------------------------
+# Building the Neural Network and run it on Vanilla
 #
-# In this step we generate C code for a single Conv2D layer and run it on
+# To demonstrate UMA's functionality, we will generate C code for a single Conv2D layer and run it on
 # the Vanilla accelerator.
 # The file ``vanilla_accelerator/run.py`` provides a demo running a Conv2D layer
 # making use of Vanilla's C-API.
 #
+# Excerpt from vanilla_accelerator/run.py:
+from tvm.relay.backend.contrib.uma._template.run import create_conv2d
+from tvm.testing.aot import AOTTestModel as AOTModel, compile_and_run
+
+def main():
+    mod, inputs, output_list, runner = create_conv2d()
+
+    uma_backend = VanillaAcceleratorBackend()
+    uma_backend.register()
+    mod = uma_backend.partition(mod)
+    target = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
+
+    export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
+    print(f"Generated files are in {export_directory}")
+    compile_and_run(
+        AOTModel(module=mod, inputs=inputs, outputs=output_list),
+        runner,
+        interface_api="c",
+        use_unpacked_api=True,
+        target=target,
+        test_dir=str(export_directory),
+    )
+main()
+
+############################################################
 # By running ``vanilla_accelerator/run.py`` the output files are generated in the model library format (MLF).
 #
 
+###########################################################
 # Output:
+#
 # .. code-block:: bash
 #
-# Generated files are in /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
-
+#   Generated files are in /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
 
+###########################################################
 # Let's examine the generated files:
+#
+#
 # Output:
+#
 # .. code-block:: bash
 #
-# cd /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
-# cd build/
-# ls -1
+#   cd /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
+#   cd build/
+#   ls -1
 #
-# codegen
-# lib.tar
-# metadata.json
-# parameters
-# runtime
-# src
+#   codegen
+#   lib.tar
+#   metadata.json
+#   parameters
+#   runtime
+#   src
 
+###########################################################
 # To evaluate the generated C code go to ``codegen/host/src/default_lib2.c``
-# ```
-# $ cd codegen/host/src/
-# $ ls -1
-# default_lib0.c
-# default_lib1.c
-# default_lib2.c
-# ```
+#
+# .. code-block:: bash
+#
+#   cd codegen/host/src/
+#   ls -1
+#
+#   default_lib0.c
+#   default_lib1.c
+#   default_lib2.c
+# 
+
+###########################################################
 # In `default_lib2.c` you can now see that the generated code calls
-# into Vanilla's C-API
-# .. code-block:: c
-# TVM_DLL int32_t tvmgen_default_vanilla_accelerator_main_0(float* placeholder, float* placeholder1, float* conv2d_nchw, uint8_t* global_workspace_1_var) {
-#   vanilla_accelerator_conv2dnchw(placeholder, placeholder1, conv2d_nchw, 32, 14, 14, 32, 3, 3);
-#   return 0;
-# }
+# into Vanilla's C-API and executes a Conv2D layer:
 #
+# .. code-block:: c++
+#
+#   TVM_DLL int32_t tvmgen_default_vanilla_accelerator_main_0(float* placeholder, float* placeholder1, float* conv2d_nchw, uint8_t* global_workspace_1_var) {
+#        vanilla_accelerator_conv2dnchw(placeholder, placeholder1, conv2d_nchw, 32, 14, 14, 32, 3, 3);
+#        return 0;
+#   }
+#
+
 
 ###########################################################
 # Strawberry
-# ---
-# TBD
+# ---------------
+# Coming soon ...
 
 ###########################################################
 # Chocolate
-# ---
-# TBD
+# --------------
+# Coming soon ...
 #
 
 ######################################################################
-# More
-# ----
-# Did this tutorial **not** fit to your accelerator? Please add your requirements to the UMA thread in
+# Request for Community Input
+# -----------------------------
+# If this tutorial **did not** fit to your accelerator, lease add your requirements to the UMA thread in
 # the TVM discuss forum: `Link <https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039>`_.
 # We are eager to extend this tutorial to provide guidance on making further classes of AI hardware
 # accelerators TVM-ready using the UMA interface.
diff --git a/python/tvm/relay/backend/contrib/uma/tutorial.md b/python/tvm/relay/backend/contrib/uma/tutorial.md
deleted file mode 100644
index f669dcb62539..000000000000
--- a/python/tvm/relay/backend/contrib/uma/tutorial.md
+++ /dev/null
@@ -1,195 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-Making your hardware accelerator TVM-ready with UMA 
-=============================================
-
-**Disclaimer**: *This is an early preliminary version of this tutorial. Feel free to aks questions or give feedback via the UMA thread in the TVM
-discussion forum [[link](https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039)].*
-
-
-This tutorial will give you step-by-step guidance how to use UMA to
-make your hardware accelerator TVM-ready.
-While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
-API to integrate a number of hardware accelerator classes into TVM.
-
-In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
-In these use case the three mock-accelerators
-**Vanilla**, **Strawberry** and **Chocolate** are introduced and
-integrated into TVM using UMA. 
-
-
-Vanilla
-===
-**Vanilla** is a simple accelerator consisting of a MAC array and has no internal memory.
-It is can ONLY process Conv2D layers, all other layers are executed on a CPU, that also orchestrates **Vanilla**.
-Both the CPU and Vanilla use a shared memory.
-
-For this purpose **Vanilla** has a C interface `vanilla_conv2dnchw`, that accepts pointers to input data *if_map*,
-*weights* and *result* data, as well as the parameters of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
-```c
-int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
-```
-
-The script `uma_cli` creates code skeletons with API-calls into the UMA-API for new accelerators.
-For **Vanilla** we use it like this:
-
-```
-cd tvm/python/tvm/relay/backend/contrib/uma
-python uma_cli.py --add-hardware vanilla_accelerator --tutorial vanilla
-```
-The option `--tutorial vanilla` adds all the additional files required for this part of the tutorial.
-
-```
-$ ls tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator
-
-backend.py
-codegen.py
-conv2dnchw.cc
-passes.py
-patterns.py
-run.py
-strategies.py
-```
-
-Step 1: Vanilla backend
----
-This snippet is a full backed for **Vanilla**:
-```python
-class VanillaAcceleratorBackend(UMABackend):
-    """UMA backend for VanillaAccelerator."""
-
-    def __init__(self):
-        super().__init__()
-
-        self._register_pattern("conv2d", conv2d_pattern())
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2DPass())
-        self._register_codegen(fmt="c", includes=gen_includes)
-
-    @property
-    def target_name(self):
-        return "vanilla_accelerator"
-```
-It is found in `tvm/python/tvm/relay/backend/contrib/uma/vanilla_accelerator/backend.py`.
-
-Step 2: Define offloaded patterns
----
-
-To specify that `Conv2D` is offloaded to **Vanilla**, we describe it as Relay dataflow pattern in 
-`patterns.py` 
- [[DFPattern]](https://tvm.apache.org/docs/reference/langref/relay_pattern.html) 
-:
-```python
-def conv2d_pattern():
-    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
-    pattern = pattern.has_attr({"strides": [1, 1]})
-    return pattern
-```
-
-To map **Conv2D** operations from input graph  to **Vanilla**'s 
-low level function call, TIR pass 
-*VanillaAcceleratorConv2DPass* (that will be discussed later in this tutorial)
-is registered in `VanillaAcceleratorBackend`.
-
-Step 3: Modify Codegen
----
-```
-self._register_codegen(fmt="c", includes=gen_includes)
-```
-
-We tell TVM to create C code using ``fmt="c"`` via 
-`self._register_codegen`. As `Conv2D` layers should be executed via Vanilla's
-C interface `vanilla_conv2dnchw(...)`, the TVM generated C code also require an
-`#include` statement.
-
-This is done by providing the include-string like this:
-```python
-# in vanilla_accelerator/backend.py
-self._register_codegen(fmt="c", includes=gen_includes)
-
-# in vanilla_accelerator/codegen.py
-def gen_includes() -> str:
-    return "#include \"conv2dnchw.cc\""
-```        
-
-
-Step 4: Building the Neural Network and run it on Vanilla
----
-In this step we generate C code for a single Conv2D layer and run it on
-the Vanilla accelerator.
-The file `vanilla_accelerator/run.py` provides a demo running a Conv2D layer 
-making use of Vanilla's C-API.
-
-By running `vanilla_accelerator/run.py` the output files are generated in the model library format (MLF).
-
-
-Output:
-```
-Generated files are in /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
-```
-
-Let's examine the generated files:
-
-```
-$ cd /tmp/tvm-debug-mode-tempdirs/2022-07-13T13-26-22___x5u76h0p/00000
-$ cd build/
-$ ls -1
-codegen
-lib.tar
-metadata.json
-parameters
-runtime
-src
-```
-To evaluate the generated C code go to `codegen/host/src/`
-```
-$ cd codegen/host/src/
-$ ls -1
-default_lib0.c
-default_lib1.c
-default_lib2.c
-```
-In `default_lib2.c` you can now see that the generated code calls
-into Vanilla's C-API
-```c
-TVM_DLL int32_t tvmgen_default_vanilla_accelerator_main_0(float* placeholder, float* placeholder1, float* conv2d_nchw, uint8_t* global_workspace_1_var) {
-  vanilla_accelerator_conv2dnchw(placeholder, placeholder1, conv2d_nchw, 32, 14, 14, 32, 3, 3);
-  return 0;
-}
-```
-
-
-Strawberry
----
-TBD
-
-Chocolate
----
-TBD
-
-More
----
-Did this tutorial **not** fit to your accelerator? Please add your requirements to the UMA thread in
-the TVM discuss forum: [Link](https://discuss.tvm.apache.org/t/rfc-uma-universal-modular-accelerator-interface/12039).
-We are eager to extend this tutorial to provide guidance on making further classes of AI hardware
-accelerators TVM-ready using the UMA interface.
-
-References
----
-[UMA-RFC] [UMA: Universal Modular Accelerator Interface](https://github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md), TVM RFC, June 2022.
-
-[DFPattern] [Pattern Matching in Relay](https://tvm.apache.org/docs/reference/langref/relay_pattern.html) 

From 9c38cd89ff504902a49da1a05664dc991236bc60 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 22 Jul 2022 15:03:04 +0000
Subject: [PATCH 085/112] [UMA] moved uma cli to apps

---
 {python/tvm/relay/backend/contrib => apps}/uma/uma_cli.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {python/tvm/relay/backend/contrib => apps}/uma/uma_cli.py (100%)

diff --git a/python/tvm/relay/backend/contrib/uma/uma_cli.py b/apps/uma/uma_cli.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/uma_cli.py
rename to apps/uma/uma_cli.py

From a8fa294c5ab8b69d294dc8bb7d4733d70611315c Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 25 Jul 2022 10:11:58 +0200
Subject: [PATCH 086/112] [UMA] change requests according to PR-12087

---
 gallery/tutorial/uma.py                       | 22 ++++++++++---------
 .../tvm/relay/backend/contrib/uma/__init__.py |  2 ++
 .../backend/contrib/uma/_template/backend.py  |  8 -------
 .../contrib/uma/_template/conv2dnchw.cc       | 17 ++++++++++++++
 .../relay/backend/contrib/uma/api/codegen.py  |  9 ++++++++
 .../tvm/relay/backend/contrib/uma/backend.py  | 14 ++----------
 6 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index bec124c1947e..74c26efa7362 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -46,8 +46,10 @@
 from tvm.relay.backend.contrib.uma.backend import UMABackend
 from tvm.relay.dataflow_pattern import is_op, wildcard
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
-from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass as VanillaAcceleratorConv2DPass
-from tvm.relay.backend.contrib.uma._template.codegen import gen_includes 
+from tvm.relay.backend.contrib.uma._template.passes import (
+    MyAiHwConv2dPass as VanillaAcceleratorConv2DPass,
+)
+from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
 
 
 ######################################################################
@@ -65,7 +67,7 @@
 #
 
 ######################################################################
-# **Vanilla** has a C interface ``vanilla_conv2dnchw(...)``` for carrying out a Conv2D operation (including same-padding), 
+# **Vanilla** has a C interface ``vanilla_conv2dnchw(...)``` for carrying out a Conv2D operation (including same-padding),
 # that accepts pointers to input feature map, weights and result,
 # as well as the dimensions of `Conv2D`: `oc`, `iw`, `ih`, `ic`, `kh`, `kw`.
 #
@@ -74,8 +76,6 @@
 #   int vanilla_conv2dnchw(float* ifmap, float*  weights, float*  result, int oc, int iw, int ih, int ic, int kh, int kw);
 
 
-
-
 ################################################################################
 # The script `uma_cli` creates code skeletons with API-calls into the UMA-API for new accelerators.
 #
@@ -103,9 +103,10 @@
 
 ################################################################################
 # Vanilla backend
-# 
+#
 #  The generated backend for vanilla is found in `vanilla_accelerator/backend.py`:
 
+
 class VanillaAcceleratorBackend(UMABackend):
     """UMA backend for VanillaAccelerator."""
 
@@ -121,7 +122,6 @@ def target_name(self):
         return "vanilla_accelerator"
 
 
-
 ################################################################################
 # Define offloaded patterns
 #
@@ -164,12 +164,11 @@ def conv2d_pattern():
 # As shown above in `VanillaAcceleratorBackend` it is registered to UMA with
 # the `self._register_codegen`
 #
-# .. code-block:: python 
+# .. code-block:: python
 #
 #   self._register_codegen(fmt="c", includes=gen_includes)
 
 
-
 ###########################################################
 # Building the Neural Network and run it on Vanilla
 #
@@ -182,6 +181,7 @@ def conv2d_pattern():
 from tvm.relay.backend.contrib.uma._template.run import create_conv2d
 from tvm.testing.aot import AOTTestModel as AOTModel, compile_and_run
 
+
 def main():
     mod, inputs, output_list, runner = create_conv2d()
 
@@ -200,6 +200,8 @@ def main():
         target=target,
         test_dir=str(export_directory),
     )
+
+
 main()
 
 ############################################################
@@ -243,7 +245,7 @@ def main():
 #   default_lib0.c
 #   default_lib1.c
 #   default_lib2.c
-# 
+#
 
 ###########################################################
 # In `default_lib2.c` you can now see that the generated code calls
diff --git a/python/tvm/relay/backend/contrib/uma/__init__.py b/python/tvm/relay/backend/contrib/uma/__init__.py
index e55210d1b5d6..061a42e23a87 100644
--- a/python/tvm/relay/backend/contrib/uma/__init__.py
+++ b/python/tvm/relay/backend/contrib/uma/__init__.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+"""UMA modules for Relay."""
+
 from .backend import UMABackend
 from .api.utils import uma_available
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 4f2a50e395eb..6ceac88b1a84 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -28,24 +28,16 @@ class MyAiHwBackend(UMABackend):
     def __init__(self):
         super().__init__()
 
-        #######################################################################
         # Target configuration
-        #######################################################################
         self._register_target_attr("dimension")
 
-        #######################################################################
         # Relay Pattern registration
-        #######################################################################
         self._register_pattern("conv2d", conv2d_pattern())
 
-        #######################################################################
         # Relay to TIR function registration
-        #######################################################################
         self._register_tir_pass(PassPhase.TIR_PHASE_0, MyAiHwConv2dPass())
 
-        #######################################################################
         # TIR to runtime function registration
-        #######################################################################
         self._register_codegen(fmt="c", includes=gen_includes)
 
     @property
diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
index 18c5855c46ef..7bbf3714ab31 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
@@ -21,6 +21,23 @@
 #ifdef __cplusplus
 extern "C"
 #endif
+
+  /*!
+   * \brief Conv2D function for mock-accelerator examples. Limited to same-padded Conv2D with stride (1,1) and datatype float.
+   * \param ifmap Pointer to input feature map data of size iw*ih*ic*sizeof(float).
+   * \param weights Pointer to weight data of size kh*kw*ic**oc*sizeof(float).
+   * \param result Pointer to output feature map data of size iw*ih*oc*sizeof(float).
+   * \param oc Number of channels of output feature map.
+   * \param iw Width of input feature map, ifmap.
+   * \param ih Height of input feature map, ifmap.
+   * \param ic Number of channels of input feature map.
+   * \param kh Height of convolution kernels.
+   * \param kw Wifth of convolution kernels.
+   * 
+   * \
+   * \return error code
+   *
+   */
     int
     my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, int iw, int ih, int ic,
                         int kh, int kw) {
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index eb3dd4d2b48b..c5657fbe9280 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -39,6 +39,15 @@ def _register_c_codegen(
         includes: Callable[[], str] = None,
         replace_call_extern: Callable[[tvm.ir.container.Array], str] = None,
     ) -> None:
+        """Registration of UMA helper functions, e.g. includes and replace_call_extern.
+
+        Parameters
+        ----------
+        includes : Callable[[], str]
+            user-defined function that adds C-#include statement to UMA C-Code.
+        replace_call_extern:  Callable[[tvm.ir.container.Array], str]
+            user-definde function that defines how to replace extern call in UMA C-Code.
+        """
         if includes is not None:
             tvm._ffi.register_func(
                 "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 363b1de33a3b..156f4cfd7251 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -48,9 +48,7 @@ def target_name(self) -> str:
         """
         ...
 
-    ############################################################################
     # Target configuration
-    ############################################################################
     def _register_target_attr(
         self,
         name: str,
@@ -77,9 +75,7 @@ def _register_target_attr(
         """
         self._target_attrs[name] = default
 
-    ############################################################################
     # Relay to Relay function registration
-    ############################################################################
     def _register_relay_pass(self, phase: PassPhase, relay_pass: tvm.transform.Pass) -> None:
         """Registers a relay pass at the given phase in the lowering process.
 
@@ -129,8 +125,8 @@ def _register_pattern(
         pattern: tvm.relay.dataflow_pattern.DFPattern
             Relay DFPattern
 
-        predicate: Callable Receiving the matched pattern and
-
+        predicate: Optional[Callable]
+            Optional predicate for Relay DFPattern
         Example
         -------
         Here is an example of how two dataflow patterns are registered.
@@ -152,9 +148,7 @@ def _register_pattern(
         """
         self._relay_to_relay.add_pattern(name, pattern, predicate)
 
-    ############################################################################
     # Relay to TIR function registration
-    ############################################################################
     def _register_operator_strategy(
         self,
         op: str,
@@ -238,9 +232,7 @@ def transform_function(self, func, mod, ctx):
         """
         self._relay_to_tir._tir_passes.append((phase, tir_pass))
 
-    ############################################################################
     # TIR to runtime function registration
-    ############################################################################
     def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         """Registers a codegen which is used in place of the default C-codegen.
 
@@ -282,9 +274,7 @@ def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
         """
         self._tir_to_runtime._register_codegen(fmt, **kwargs)
 
-    ############################################################################
     # Backend functions
-    ############################################################################
     def register(self) -> None:
         registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
 

From afd88f2926de871b1d1c563dc5c67679c5e66050 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 25 Jul 2022 10:55:56 +0200
Subject: [PATCH 087/112] [UMA] update and sync of uma_cli and tutorial

---
 apps/uma/uma_cli.py     | 13 +++++++++----
 gallery/tutorial/uma.py |  3 ++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/apps/uma/uma_cli.py b/apps/uma/uma_cli.py
index 460781f14035..c123c58ee0c8 100644
--- a/apps/uma/uma_cli.py
+++ b/apps/uma/uma_cli.py
@@ -26,7 +26,9 @@
 import os
 import shutil
 import sys
+import pathlib
 from inflection import camelize, underscore
+import tvm.relay.backend.contrib.uma._template.backend as template_backend
 
 
 def _parse_args():
@@ -66,14 +68,15 @@ def main():
     """
     args = _parse_args()
     add_hw_name = args.add_hardware
-    add_hw_path = os.path.join(os.getcwd(), add_hw_name)
+    uma_template_path = pathlib.Path(template_backend.__file__).parent.absolute()
+
+    add_hw_path = os.path.join(uma_template_path.parent, add_hw_name)
     if os.path.exists(add_hw_path):
-        print(f"Hardware with name {add_hw_name} already exists in UMA file structure")
+        print(f"Hardware with name {add_hw_name} already exists in UMA file structure: {add_hw_path}")
         sys.exit(-1)
     else:
-        os.mkdir(add_hw_name)
+        os.mkdir(add_hw_path)
 
-    uma_template_path = "_template"
     uma_files = ["backend.py", "codegen.py", "passes.py", "patterns.py", "run.py", "strategies.py"]
     if args.tutorial == "vanilla":
         uma_files.append("conv2dnchw.cc")
@@ -87,6 +90,8 @@ def main():
     template_name = "my_ai_hw"
     replace_template_name(destination_files, template_name, add_hw_name)
 
+    print(f"Success: added {add_hw_name} to {add_hw_path}")
+
 
 if __name__ == "__main__":
     main()
diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index 74c26efa7362..d15d8de71e2b 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -83,7 +83,8 @@
 #
 # .. code-block:: bash
 #
-#   cd $TVM_HOME/apps
+#   pip install inflection
+#   cd $TVM_HOME/apps/uma
 #   python uma_cli.py --add_hardware vanilla_accelerator --tutorial vanilla
 #
 

From ffecd7ac1cf7f9a9ee36fb8e2004fe2f51666d31 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 25 Jul 2022 15:10:18 +0200
Subject: [PATCH 088/112] [UMA] update of template passe: remove Pad block of
 Conv2D

---
 python/tvm/relay/backend/contrib/uma/_template/passes.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 8e02a8fbd983..cff677d166e5 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -67,10 +67,13 @@ def _replace_conv2d(op):
                     external_call = tvm.tir.Evaluate(
                         tir_call(irb, True, _external_function_name, *args)
                     )
-                    mac_calls = tvm.tir.SeqStmt([external_call])
-                    irb.emit(mac_calls)
+                    ext_calls = tvm.tir.SeqStmt([external_call])
+                    irb.emit(ext_calls)
                     irb_result = irb.get()
                     return irb_result
+                elif isinstance(op, tvm.tir.SeqStmt):
+                    # Remove that pad block of TOPI's conv2DNCHW by only returning the 2nd statement 
+                    return op.seq[1]
                 return op
 
             sch = tir.Schedule(func)
@@ -92,7 +95,7 @@ def _replace_conv2d(op):
                 _loops = {k: sch.get(v) for k, v in loops.items()}
                 _handles = func.buffer_map.items()
 
-                x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For"])
+                x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For", "tir.SeqStmt"])
                 return func.with_body(x)
             else:
                 return func

From 69ebe5952031f7596430856b93a88723c17e3cf4 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 25 Jul 2022 16:57:15 +0200
Subject: [PATCH 089/112] [UMA] lint updates

---
 apps/uma/uma_cli.py                                      | 4 +++-
 python/tvm/relay/backend/contrib/uma/_template/passes.py | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/apps/uma/uma_cli.py b/apps/uma/uma_cli.py
index c123c58ee0c8..abb8a473cbbc 100644
--- a/apps/uma/uma_cli.py
+++ b/apps/uma/uma_cli.py
@@ -72,7 +72,9 @@ def main():
 
     add_hw_path = os.path.join(uma_template_path.parent, add_hw_name)
     if os.path.exists(add_hw_path):
-        print(f"Hardware with name {add_hw_name} already exists in UMA file structure: {add_hw_path}")
+        print(
+            f"Hardware with name {add_hw_name} already exists in UMA file structure: {add_hw_path}"
+        )
         sys.exit(-1)
     else:
         os.mkdir(add_hw_path)
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index cff677d166e5..6d1db596d907 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -72,7 +72,7 @@ def _replace_conv2d(op):
                     irb_result = irb.get()
                     return irb_result
                 elif isinstance(op, tvm.tir.SeqStmt):
-                    # Remove that pad block of TOPI's conv2DNCHW by only returning the 2nd statement 
+                    # Remove that pad block of TOPI's conv2DNCHW by only returning the 2nd statement
                     return op.seq[1]
                 return op
 
@@ -95,7 +95,9 @@ def _replace_conv2d(op):
                 _loops = {k: sch.get(v) for k, v in loops.items()}
                 _handles = func.buffer_map.items()
 
-                x = tvm.tir.stmt_functor.ir_transform(func.body, None, _replace_conv2d, ["tir.For", "tir.SeqStmt"])
+                x = tvm.tir.stmt_functor.ir_transform(
+                    func.body, None, _replace_conv2d, ["tir.For", "tir.SeqStmt"]
+                )
                 return func.with_body(x)
             else:
                 return func

From f2480d9270e8a21044108f1f6d2459172ef2dae2 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 26 Jul 2022 11:50:33 +0200
Subject: [PATCH 090/112] [UMA] Test updates

---
 .../python/contrib/test_uma/test_uma_lowering_with_umalower.py  | 2 +-
 tests/python/contrib/test_uma/test_uma_pipeline.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index 7109ba01a607..6578bd6a6c69 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -116,4 +116,4 @@ def test_lower_with_uma(n, w, h, ci, kw, kh, co):
 
 
 if __name__ == "__main__":
-    test_lower_with_uma(1, 224, 224, 3, 3, 3, 4)
+    tvm.testing.main()
diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
index b53dd9c4e894..0b02982744ae 100644
--- a/tests/python/contrib/test_uma/test_uma_pipeline.py
+++ b/tests/python/contrib/test_uma/test_uma_pipeline.py
@@ -102,4 +102,4 @@ def _generate_runtime_data(input_shapes: dict, output_shapes: dict) -> [OrderedD
 
 
 if __name__ == "__main__":
-    test_conv2d()
+    tvm.testing.main()

From 3dab3f770795b50a2b54bd2d601ec229f1e46be6 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 26 Jul 2022 16:45:56 +0200
Subject: [PATCH 091/112] [UMA] fixes according to comments from PR 12087
 discussion

---
 .../backend/contrib/uma/_template/__init__.py   |  9 ++++-----
 .../backend/contrib/uma/_template/conv2dnchw.cc |  8 +++++---
 .../backend/contrib/uma/_template/passes.py     | 13 +++++++------
 .../relay/backend/contrib/uma/api/codegen.py    | 17 ++++++++++-------
 .../tvm/relay/backend/contrib/uma/api/utils.py  | 14 +++++++++++++-
 tests/python/contrib/test_uma/test_partition.py |  2 ++
 6 files changed, 41 insertions(+), 22 deletions(-)
 rename tests/scripts/task_python_uma.sh => python/tvm/relay/backend/contrib/uma/_template/__init__.py (80%)
 mode change 100755 => 100644

diff --git a/tests/scripts/task_python_uma.sh b/python/tvm/relay/backend/contrib/uma/_template/__init__.py
old mode 100755
new mode 100644
similarity index 80%
rename from tests/scripts/task_python_uma.sh
rename to python/tvm/relay/backend/contrib/uma/_template/__init__.py
index 66dd0587af56..500eb327796e
--- a/tests/scripts/task_python_uma.sh
+++ b/python/tvm/relay/backend/contrib/uma/_template/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env bash
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,10 +14,10 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+"""
 
-set -euxo pipefail
+Template files for UMA tutorial
 
-source tests/scripts/setup-pytest-env.sh
+Do not import
 
-run_pytest ctypes test_uma tests/python/contrib/test_uma
-run_pytest cython3 test_uma  tests/python/contrib/test_uma
\ No newline at end of file
+"""
diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
index 7bbf3714ab31..047bde107112 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
+++ b/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
@@ -32,9 +32,8 @@ extern "C"
    * \param ih Height of input feature map, ifmap.
    * \param ic Number of channels of input feature map.
    * \param kh Height of convolution kernels.
-   * \param kw Wifth of convolution kernels.
+   * \param kw Width of convolution kernels.
    * 
-   * \
    * \return error code
    *
    */
@@ -50,6 +49,7 @@ extern "C"
   int padded_iw = iw + 2 * kw_low;
   int padded_ih = ih + 2 * kh_low;
 
+  // This is only example code. A real hardware accelerator would call a device specific malloc function.
   float* pad_temp = (float*)malloc(
       (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
 
@@ -88,6 +88,8 @@ extern "C"
       }
     }
   }
+
+  // This is only example code. A real hardware accelerator would call a device specific free function.
   free(pad_temp);
   return 0;
-}
\ No newline at end of file
+}
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 6d1db596d907..64c98130a3b1 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -23,6 +23,9 @@
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
 class MyAiHwConv2dPass:
+    _EXTERNAL_FUNCTION_NAME = "my_ai_hw_conv2dnchw"
+    _TVM_BLOCK_MATCH_NAME = "conv2d_nchw"
+
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
@@ -34,10 +37,8 @@ def _my_ai_hw_conv2d_pass(func, mod, ctx):
         _loops = dict()
         _handles = []
         _entry_node = None
-        _external_function_name = "my_ai_hw_conv2dnchw"
-        _tvm_block_match_name = "conv2d_nchw"
 
-        def _has_block(name: str, func) -> bool:
+        def _has_block(name: str, func: tvm.tir.PrimFunc) -> bool:
             """
             Determine of a tir.block with `name` exists in `func`
             """
@@ -65,7 +66,7 @@ def _replace_conv2d(op):
                     offsets = [_loops[i].extent.value for i in offset_order]
                     args = buffers + offsets
                     external_call = tvm.tir.Evaluate(
-                        tir_call(irb, True, _external_function_name, *args)
+                        tir_call(irb, True, MyAiHwConv2dPass._EXTERNAL_FUNCTION_NAME, *args)
                     )
                     ext_calls = tvm.tir.SeqStmt([external_call])
                     irb.emit(ext_calls)
@@ -78,8 +79,8 @@ def _replace_conv2d(op):
 
             sch = tir.Schedule(func)
 
-            if _has_block(_tvm_block_match_name, func):
-                conv2d_block = sch.get_block(_tvm_block_match_name)
+            if _has_block(_TVM_BLOCK_MATCH_NAME, func):
+                conv2d_block = sch.get_block(_TVM_BLOCK_MATCH_NAME)
                 rv_loops = sch.get_loops(conv2d_block)
                 assert len(rv_loops) == 7
                 loops = dict(
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index c5657fbe9280..c044ef23cbda 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -16,7 +16,7 @@
 # under the License.
 """Codegen base class of the Universal Modular Accelerator Interface (UMA)"""
 
-from typing import Callable
+from typing import Callable, Optional
 import tvm
 
 
@@ -36,26 +36,29 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
 
     def _register_c_codegen(
         self,
-        includes: Callable[[], str] = None,
-        replace_call_extern: Callable[[tvm.ir.container.Array], str] = None,
+        includes: Optional[Callable[[], str]] = None,
+        replace_call_extern: Optional[Callable[[tvm.ir.container.Array], str]] = None,
     ) -> None:
         """Registration of UMA helper functions, e.g. includes and replace_call_extern.
 
         Parameters
         ----------
-        includes : Callable[[], str]
+        includes : OptionalCallable[[], str]]
             user-defined function that adds C-#include statement to UMA C-Code.
-        replace_call_extern:  Callable[[tvm.ir.container.Array], str]
+        replace_call_extern:  OptionalCallable[[tvm.ir.container.Array], str]]
             user-definde function that defines how to replace extern call in UMA C-Code.
         """
         if includes is not None:
             tvm._ffi.register_func(
-                "relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes
+                f"relay.ext.uma.codegen_c_includes_{self.target_name}",
+                includes,
+                override=True,
             )
         if replace_call_extern is not None:
             tvm._ffi.register_func(
-                "relay.ext.uma.codegen_c_replace_call_extern_{}".format(self.target_name),
+                f"relay.ext.uma.codegen_c_replace_call_extern_{self.target_name}",
                 replace_call_extern,
+                override=True,
             )
 
     def register(self) -> None:
diff --git a/python/tvm/relay/backend/contrib/uma/api/utils.py b/python/tvm/relay/backend/contrib/uma/api/utils.py
index de011dafa543..e217fbf3d6ad 100644
--- a/python/tvm/relay/backend/contrib/uma/api/utils.py
+++ b/python/tvm/relay/backend/contrib/uma/api/utils.py
@@ -32,7 +32,19 @@ def uma_available() -> bool:
 
 
 class PassPhase(Enum):
-    """UMA pass phases."""
+    """
+    UMA pass phases:
+
+    PRE_PARTITIONING: prior to UMA partitioning
+    POST_PARTITIONING_0: after UMA partitioning, before Defunctionalization
+    POST_PARTITIONING_1: after UMA partitioning and after Defunctionalization
+    TIR_PHASE_0: Generates the raw IR and loop levels.
+    TIR_PHASE_1: Flattens the array storage.
+    TIR_PHASE_2: Transforms loops, like unroll, vectorization and thread-binding.
+    TIR_PHASE_3: Does some cleanup work.
+
+    Reference to TIR phases: src/driver/driver_api.c
+    """
 
     PRE_PARTITIONING = auto()
     POST_PARTITIONING_0 = auto()
diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index f35a2ada1e2c..217f23083caf 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -59,6 +59,8 @@ def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
         net = resnet.get_net(1, 10)
     elif workload == "mlp":
         net = mlp.get_net(1, 10)
+    else:
+        assert False, f"don't know how to find workload for {workload}"
 
     mod = tvm.ir.IRModule()
     mod["main"] = net

From fa02920cfb4fc08381f82f06e33232afdbd97931 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 11:27:14 +0200
Subject: [PATCH 092/112] [UMA] lint updates

---
 .../backend/contrib/uma/_template/backend.py  |  2 +-
 .../backend/contrib/uma/_template/passes.py   | 19 +++++++++----------
 .../backend/contrib/uma/_template/patterns.py | 11 +----------
 .../contrib/uma/_template/strategies.py       |  9 ---------
 4 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/python/tvm/relay/backend/contrib/uma/_template/backend.py
index 6ceac88b1a84..a75efb87f454 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/backend.py
@@ -18,7 +18,7 @@
 from .passes import MyAiHwConv2dPass
 from ..api.utils import PassPhase
 from ..backend import UMABackend
-from .codegen import gen_includes, gen_replace_call_extern
+from .codegen import gen_includes
 from .patterns import conv2d_pattern
 
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/python/tvm/relay/backend/contrib/uma/_template/passes.py
index 64c98130a3b1..c20d3b93c5ca 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/passes.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/passes.py
@@ -17,14 +17,12 @@
 """Transform passes for the my_ai_hw accelerator"""
 
 import tvm
-from tvm import relay, tir
+from tvm import tir
 from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
 
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
 class MyAiHwConv2dPass:
-    _EXTERNAL_FUNCTION_NAME = "my_ai_hw_conv2dnchw"
-    _TVM_BLOCK_MATCH_NAME = "conv2d_nchw"
 
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
@@ -33,10 +31,11 @@ def transform_function(
 
     @staticmethod
     def _my_ai_hw_conv2d_pass(func, mod, ctx):
-        _found_blocks = []
         _loops = dict()
         _handles = []
         _entry_node = None
+        _external_function_name = "my_ai_hw_conv2dnchw"
+        _tvm_block_match_name = "conv2d_nchw"
 
         def _has_block(name: str, func: tvm.tir.PrimFunc) -> bool:
             """
@@ -51,7 +50,7 @@ def _hb(op):
             tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
             return name in _found_blocks
 
-        def _transform_function(
+        def _detect_and_replace_conv2d(
             func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
         ) -> tvm.tir.PrimFunc:
             def _replace_conv2d(op):
@@ -66,7 +65,7 @@ def _replace_conv2d(op):
                     offsets = [_loops[i].extent.value for i in offset_order]
                     args = buffers + offsets
                     external_call = tvm.tir.Evaluate(
-                        tir_call(irb, True, MyAiHwConv2dPass._EXTERNAL_FUNCTION_NAME, *args)
+                        tir_call(irb, True, _external_function_name, *args)
                     )
                     ext_calls = tvm.tir.SeqStmt([external_call])
                     irb.emit(ext_calls)
@@ -79,8 +78,8 @@ def _replace_conv2d(op):
 
             sch = tir.Schedule(func)
 
-            if _has_block(_TVM_BLOCK_MATCH_NAME, func):
-                conv2d_block = sch.get_block(_TVM_BLOCK_MATCH_NAME)
+            if _has_block(_tvm_block_match_name, func):
+                conv2d_block = sch.get_block(_tvm_block_match_name)
                 rv_loops = sch.get_loops(conv2d_block)
                 assert len(rv_loops) == 7
                 loops = dict(
@@ -103,7 +102,7 @@ def _replace_conv2d(op):
             else:
                 return func
 
-        r = _transform_function(func, mod, ctx)
+        r = _detect_and_replace_conv2d(func, mod, ctx)
         return r
 
 
@@ -124,7 +123,7 @@ def buf_from_array(ib, arr, dtype):
         var = ib.allocate("int32", (len(arr),), scope="global")
         for i, v in enumerate(arr):
             var[i] = v
-        # Declare a buffer, which is basically a view on the chunk of memory that we allocated previously
+        # Declare a buffer, which is basically a view on the chunk of memory that we allocated
         buf = tvm.tir.decl_buffer((len(arr),), dtype, data=var, scope="global")
         return buf
 
diff --git a/python/tvm/relay/backend/contrib/uma/_template/patterns.py b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
index e4084c81c1e6..f9e2087dbc8c 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/patterns.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/patterns.py
@@ -16,19 +16,10 @@
 # under the License.
 """Relay graph patterns for the my_ai_hw accelerator"""
 
-from tvm.relay.dataflow_pattern import is_op, wildcard, has_attr
+from tvm.relay.dataflow_pattern import is_op, wildcard
 
 
 def conv2d_pattern():
     pattern = is_op("nn.conv2d")(wildcard(), wildcard())
     pattern = pattern.has_attr({"strides": [1, 1]})
     return pattern
-
-
-def dense_pattern():
-    pattern = is_op("nn.dense")(wildcard(), wildcard())
-    pattern = pattern.optional(
-        lambda x: is_op("nn.bias_add")(x, wildcard()) | is_op("add")(x, wildcard())
-    )
-    pattern = pattern.optional(lambda x: is_op("nn.relu")(x))
-    return pattern
diff --git a/python/tvm/relay/backend/contrib/uma/_template/strategies.py b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
index 101c62bd0a00..c7680cbaf849 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/strategies.py
+++ b/python/tvm/relay/backend/contrib/uma/_template/strategies.py
@@ -15,12 +15,3 @@
 # specific language governing permissions and limitations
 # under the License.
 """Strategies for the my_ai_hw accelerator"""
-
-
-from tvm import relay, te
-from tvm.relay.op import op as _op
-from tvm.topi.utils import get_const_tuple
-from tvm.topi.nn.utils import get_pad_tuple1d
-from tvm.relay.op.strategy.generic import wrap_compute_conv1d, wrap_topi_schedule
-
-import logging

From 54160fb150530a5e7b5b83bfca91134940b8de11 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 12:09:05 +0200
Subject: [PATCH 093/112] [UMA] moved UMA _template file to apps

---
 .../backend/contrib => apps}/uma/_template/__init__.py |  0
 .../backend/contrib => apps}/uma/_template/backend.py  | 10 +++++-----
 .../backend/contrib => apps}/uma/_template/codegen.py  |  4 ----
 .../contrib => apps}/uma/_template/conv2dnchw.cc       |  0
 .../backend/contrib => apps}/uma/_template/passes.py   |  0
 .../backend/contrib => apps}/uma/_template/patterns.py |  0
 .../backend/contrib => apps}/uma/_template/run.py      |  2 +-
 .../contrib => apps}/uma/_template/strategies.py       |  0
 apps/uma/uma_cli.py                                    |  3 +--
 gallery/tutorial/uma.py                                |  6 +++---
 .../test_uma/test_uma_lowering_with_umalower.py        |  6 +++---
 .../contrib/test_uma/test_uma_vanilla_accelerator.py   |  9 +++------
 12 files changed, 16 insertions(+), 24 deletions(-)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/__init__.py (100%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/backend.py (86%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/codegen.py (88%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/conv2dnchw.cc (100%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/passes.py (100%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/patterns.py (100%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/run.py (97%)
 rename {python/tvm/relay/backend/contrib => apps}/uma/_template/strategies.py (100%)

diff --git a/python/tvm/relay/backend/contrib/uma/_template/__init__.py b/apps/uma/_template/__init__.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/__init__.py
rename to apps/uma/_template/__init__.py
diff --git a/python/tvm/relay/backend/contrib/uma/_template/backend.py b/apps/uma/_template/backend.py
similarity index 86%
rename from python/tvm/relay/backend/contrib/uma/_template/backend.py
rename to apps/uma/_template/backend.py
index a75efb87f454..5ee7ecc19ef6 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/backend.py
+++ b/apps/uma/_template/backend.py
@@ -15,11 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 """UMA backend for the my_ai_hw accelerator"""
-from .passes import MyAiHwConv2dPass
-from ..api.utils import PassPhase
-from ..backend import UMABackend
-from .codegen import gen_includes
-from .patterns import conv2d_pattern
+from passes import MyAiHwConv2dPass
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+from tvm.relay.backend.contrib.uma.backend import UMABackend
+from codegen import gen_includes
+from patterns import conv2d_pattern
 
 
 class MyAiHwBackend(UMABackend):
diff --git a/python/tvm/relay/backend/contrib/uma/_template/codegen.py b/apps/uma/_template/codegen.py
similarity index 88%
rename from python/tvm/relay/backend/contrib/uma/_template/codegen.py
rename to apps/uma/_template/codegen.py
index 8e33ce9b2089..5e1d6b45e81f 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/codegen.py
+++ b/apps/uma/_template/codegen.py
@@ -26,7 +26,3 @@ def gen_includes() -> str:
     includes = ""
     includes += f'#include "{topdir}/conv2dnchw.cc"'
     return includes
-
-
-def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
-    return "my_custom_api_function({}, {}, {})".format(*args)
diff --git a/python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc b/apps/uma/_template/conv2dnchw.cc
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc
rename to apps/uma/_template/conv2dnchw.cc
diff --git a/python/tvm/relay/backend/contrib/uma/_template/passes.py b/apps/uma/_template/passes.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/passes.py
rename to apps/uma/_template/passes.py
diff --git a/python/tvm/relay/backend/contrib/uma/_template/patterns.py b/apps/uma/_template/patterns.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/patterns.py
rename to apps/uma/_template/patterns.py
diff --git a/python/tvm/relay/backend/contrib/uma/_template/run.py b/apps/uma/_template/run.py
similarity index 97%
rename from python/tvm/relay/backend/contrib/uma/_template/run.py
rename to apps/uma/_template/run.py
index 978b393af08e..852ae1234d0f 100644
--- a/python/tvm/relay/backend/contrib/uma/_template/run.py
+++ b/apps/uma/_template/run.py
@@ -17,7 +17,7 @@
 from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 import tvm
 from tvm import relay
-from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
+from backend import MyAiHwBackend
 from tvm.relay import transform
 from collections import OrderedDict
 import numpy as np
diff --git a/python/tvm/relay/backend/contrib/uma/_template/strategies.py b/apps/uma/_template/strategies.py
similarity index 100%
rename from python/tvm/relay/backend/contrib/uma/_template/strategies.py
rename to apps/uma/_template/strategies.py
diff --git a/apps/uma/uma_cli.py b/apps/uma/uma_cli.py
index abb8a473cbbc..159fa9e62cb6 100644
--- a/apps/uma/uma_cli.py
+++ b/apps/uma/uma_cli.py
@@ -28,7 +28,6 @@
 import sys
 import pathlib
 from inflection import camelize, underscore
-import tvm.relay.backend.contrib.uma._template.backend as template_backend
 
 
 def _parse_args():
@@ -68,7 +67,7 @@ def main():
     """
     args = _parse_args()
     add_hw_name = args.add_hardware
-    uma_template_path = pathlib.Path(template_backend.__file__).parent.absolute()
+    uma_template_path = pathlib.Path(os.getcwd(), "_template").absolute()
 
     add_hw_path = os.path.join(uma_template_path.parent, add_hw_name)
     if os.path.exists(add_hw_path):
diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index d15d8de71e2b..0708b238c87c 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -46,10 +46,10 @@
 from tvm.relay.backend.contrib.uma.backend import UMABackend
 from tvm.relay.dataflow_pattern import is_op, wildcard
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
-from tvm.relay.backend.contrib.uma._template.passes import (
+from apps.uma._template.passes import (
     MyAiHwConv2dPass as VanillaAcceleratorConv2DPass,
 )
-from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
+from apps.uma._template.codegen import gen_includes
 
 
 ######################################################################
@@ -179,7 +179,7 @@ def conv2d_pattern():
 # making use of Vanilla's C-API.
 #
 # Excerpt from vanilla_accelerator/run.py:
-from tvm.relay.backend.contrib.uma._template.run import create_conv2d
+from apps.uma._template.run import create_conv2d
 from tvm.testing.aot import AOTTestModel as AOTModel, compile_and_run
 
 
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index 6578bd6a6c69..c0b7b8d9919a 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -20,7 +20,7 @@
 import tvm
 from tests.python.contrib.test_uma.test_uma_utils import _create_schedule, _generate_io_arrays
 from tvm import topi
-from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass
+from apps.uma._template.passes import MyAiHwConv2dPass
 import tvm.testing
 from tvm import te
 from tvm.relay.backend.contrib.uma.api.lower import UMALower
@@ -50,8 +50,8 @@ def _conv2d_te_definition(shapes: dict) -> list:
 def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
 
-    uma_path = pathlib.Path(str(tvm.relay.backend.contrib.uma.__file__)).parent.absolute()
-    conv2d_file = uma_path / "_template" / "conv2dnchw.cc"
+    apps_path = pathlib.Path(str(__file__)).parent.parent.parent.parent.parent.joinpath("apps").absolute()
+    conv2d_file = apps_path / "uma" / "_template" / "conv2dnchw.cc"
 
     with conv2d_file.open() as f:
         sch_tir = _create_schedule(
diff --git a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
index c8701f7905db..e7a6b21d4ab5 100644
--- a/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
+++ b/tests/python/contrib/test_uma/test_uma_vanilla_accelerator.py
@@ -17,17 +17,14 @@
 """UMA testcase for the vanilla_accelerator accelerator"""
 import pytest
 
-import tvm
-from tvm import tir
-from tvm.relay.dataflow_pattern import is_op, wildcard
 from tvm.relay.backend.contrib.uma.api.utils import PassPhase
 from tvm.relay.backend.contrib.uma.backend import UMABackend
-from tvm.relay.backend.contrib.uma._template.passes import (
+from apps.uma._template.passes import (
     MyAiHwConv2dPass as VanillaAcceleratorConv2dPass,
 )
-from tvm.relay.backend.contrib.uma._template.codegen import gen_includes
+from apps.uma._template.codegen import gen_includes
 
-from tvm.relay.backend.contrib.uma._template.patterns import conv2d_pattern
+from apps.uma._template.patterns import conv2d_pattern
 from tvm.relay.backend.contrib.uma import uma_available
 
 pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")

From f9aa26ed3dc323626f45e7a17697f6bc7140b7d5 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 13:09:21 +0200
Subject: [PATCH 094/112] [UMA] lint

---
 apps/uma/_template/passes.py                                  | 1 -
 .../contrib/test_uma/test_uma_lowering_with_umalower.py       | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/apps/uma/_template/passes.py b/apps/uma/_template/passes.py
index c20d3b93c5ca..c943149f2367 100644
--- a/apps/uma/_template/passes.py
+++ b/apps/uma/_template/passes.py
@@ -23,7 +23,6 @@
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
 class MyAiHwConv2dPass:
-
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
diff --git a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
index c0b7b8d9919a..d2e0af05e3ee 100644
--- a/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
+++ b/tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py
@@ -50,7 +50,9 @@ def _conv2d_te_definition(shapes: dict) -> list:
 def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
     placeholders = _conv2d_te_definition(shapes)
 
-    apps_path = pathlib.Path(str(__file__)).parent.parent.parent.parent.parent.joinpath("apps").absolute()
+    apps_path = (
+        pathlib.Path(str(__file__)).parent.parent.parent.parent.parent.joinpath("apps").absolute()
+    )
     conv2d_file = apps_path / "uma" / "_template" / "conv2dnchw.cc"
 
     with conv2d_file.open() as f:

From 960fef6974614f7aa709d7517610e9b7340887a7 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 27 Jul 2022 18:18:29 +0200
Subject: [PATCH 095/112] [UMA] Remove exceptions when dispatching over targets

---
 src/relay/backend/contrib/uma/targets.cc | 25 ++++++++----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index d8a2d40f4c7d..569544d16953 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -63,24 +63,15 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
               .set_attr<FTVMTIRToRuntime>("TIRToRuntime", relay::contrib::uma::TIRToRuntime);
 
       for (auto& attr_option : attr_options) {
-        try {
-          target_kind.add_attr_option<String>(attr_option.first,
-                                              Downcast<String>(attr_option.second));
-          continue;
-        } catch (...) {
-        }
-        try {
-          target_kind.add_attr_option<Bool>(attr_option.first, Downcast<Bool>(attr_option.second));
-          continue;
-        } catch (...) {
-        }
-        try {
-          target_kind.add_attr_option<Integer>(attr_option.first,
-                                               Downcast<Integer>(attr_option.second));
-          continue;
-        } catch (...) {
+        auto option_name = attr_option.first;
+        auto default_value = attr_option.second;
+        if (default_value->IsInstance<StringObj>()) {
+          target_kind.add_attr_option<String>(option_name, Downcast<String>(default_value));
+        }else if(default_value->IsInstance<IntImmNode>()) {
+          target_kind.add_attr_option<Integer>(option_name, Downcast<Integer>(default_value));
+        } else {
           LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey()
-                     << " can not be added. Only String, Integer, or Bool are supported.";
+                     << " can not be added. Only String, Integer, or Bool are supported."; 
         }
       }
       return true;

From 734d2658365253b627cc484ed27ee7460fcf69ac Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 13:18:39 +0200
Subject: [PATCH 096/112] [UMA] vanilla pattern update

---
 apps/uma/_template/patterns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/uma/_template/patterns.py b/apps/uma/_template/patterns.py
index f9e2087dbc8c..ce25fe4dff8e 100644
--- a/apps/uma/_template/patterns.py
+++ b/apps/uma/_template/patterns.py
@@ -21,5 +21,5 @@
 
 def conv2d_pattern():
     pattern = is_op("nn.conv2d")(wildcard(), wildcard())
-    pattern = pattern.has_attr({"strides": [1, 1]})
+    pattern = pattern.has_attr({"strides": [1, 1], "groups": 1})
     return pattern

From 6f100b985ea531f9dff8702d3ea0e31bf1aec17a Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 11:27:05 +0000
Subject: [PATCH 097/112] [UMA] added mobilenet integration test

---
 .../contrib/test_uma/test_uma_pipeline.py     | 35 +++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
index 0b02982744ae..49b4a196bbd4 100644
--- a/tests/python/contrib/test_uma/test_uma_pipeline.py
+++ b/tests/python/contrib/test_uma/test_uma_pipeline.py
@@ -17,7 +17,7 @@
 
 import pytest
 from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-from tvm.relay import transform
+from tvm.relay import transform, testing
 from tvm.testing.aot import (
     AOTTestModel,
     AOTTestRunner,
@@ -31,7 +31,7 @@
 import numpy as np
 from collections import OrderedDict
 
-from tvm.relay.backend.contrib.uma import uma_available
+from tvm.relay.backend.contrib.uma.api.utils import uma_available
 
 pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
@@ -101,5 +101,36 @@ def _generate_runtime_data(input_shapes: dict, output_shapes: dict) -> [OrderedD
     return inputs, outputs
 
 
+def test_mobilenet():
+    """Full network test with Mobilenet"""
+    use_unpacked_api = True
+    interface_api = "c"
+    test_runner = AOT_DEFAULT_RUNNER
+
+    mod, params = testing.mobilenet.get_workload(batch_size=1)
+
+    uma_backend = VanillaAcceleratorBackend()
+    uma_backend.register()
+    target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
+    target_c = tvm.target.Target("c")
+
+    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
+    data = np.random.uniform(size=data_shape).astype("float32")
+    input_list = {"data": data}
+    output_list = generate_ref_data(mod, input_list, params)
+    mod = uma_backend.partition(mod)
+    aot_test_model = AOTTestModel(module=mod, inputs=input_list, outputs=output_list, params=params)
+
+    compile_and_run(
+        aot_test_model,
+        test_runner,
+        interface_api,
+        use_unpacked_api,
+        workspace_byte_alignment=1,
+        debug_calculated_workspaces=False,
+        target=[target_c, target],
+    )
+
+
 if __name__ == "__main__":
     tvm.testing.main()

From e9414211df721112342ec5485bdd88011d8a50e7 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 14:37:35 +0200
Subject: [PATCH 098/112] [UMA] clang lint

---
 apps/uma/_template/conv2dnchw.cc         | 34 ++++++++++++------------
 src/relay/backend/contrib/uma/targets.cc | 14 +++++-----
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/apps/uma/_template/conv2dnchw.cc b/apps/uma/_template/conv2dnchw.cc
index 047bde107112..2ee081b3c3ca 100644
--- a/apps/uma/_template/conv2dnchw.cc
+++ b/apps/uma/_template/conv2dnchw.cc
@@ -22,21 +22,19 @@
 extern "C"
 #endif
 
-  /*!
-   * \brief Conv2D function for mock-accelerator examples. Limited to same-padded Conv2D with stride (1,1) and datatype float.
-   * \param ifmap Pointer to input feature map data of size iw*ih*ic*sizeof(float).
-   * \param weights Pointer to weight data of size kh*kw*ic**oc*sizeof(float).
-   * \param result Pointer to output feature map data of size iw*ih*oc*sizeof(float).
-   * \param oc Number of channels of output feature map.
-   * \param iw Width of input feature map, ifmap.
-   * \param ih Height of input feature map, ifmap.
-   * \param ic Number of channels of input feature map.
-   * \param kh Height of convolution kernels.
-   * \param kw Width of convolution kernels.
-   * 
-   * \return error code
-   *
-   */
+    /*!
+     * \brief Conv2D function for mock-accelerator examples. Limited to same-padded Conv2D with
+     * stride (1,1) and datatype float. \param ifmap Pointer to input feature map data of size
+     * iw*ih*ic*sizeof(float). \param weights Pointer to weight data of size
+     * kh*kw*ic**oc*sizeof(float). \param result Pointer to output feature map data of size
+     * iw*ih*oc*sizeof(float). \param oc Number of channels of output feature map. \param iw Width
+     * of input feature map, ifmap. \param ih Height of input feature map, ifmap. \param ic Number
+     * of channels of input feature map. \param kh Height of convolution kernels. \param kw Width of
+     * convolution kernels.
+     *
+     * \return error code
+     *
+     */
     int
     my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, int iw, int ih, int ic,
                         int kh, int kw) {
@@ -49,7 +47,8 @@ extern "C"
   int padded_iw = iw + 2 * kw_low;
   int padded_ih = ih + 2 * kh_low;
 
-  // This is only example code. A real hardware accelerator would call a device specific malloc function.
+  // This is only example code. A real hardware accelerator would call a device specific malloc
+  // function.
   float* pad_temp = (float*)malloc(
       (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
 
@@ -89,7 +88,8 @@ extern "C"
     }
   }
 
-  // This is only example code. A real hardware accelerator would call a device specific free function.
+  // This is only example code. A real hardware accelerator would call a device specific free
+  // function.
   free(pad_temp);
   return 0;
 }
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 569544d16953..64e75fd39c61 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -38,14 +38,14 @@ runtime::Module TIRToRuntime(IRModule mod, Target target);
 }  // namespace relay
 
 TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
-    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options) -> bool{
-        //@todo(cgerum): We probably should get rid of target.register rather sooner than later
-        //               And use a proper registry for uma backends
-        for(const String registered_target_name  : ::tvm::TargetKindRegEntry::ListTargetKinds()){
-          if(registered_target_name == target_name){
-            return false;
-          }
+    .set_body_typed([](String target_name, Map<String, ObjectRef> attr_options) -> bool {
+      // @todo(cgerum): We probably should get rid of target.register rather sooner than later
+      //               And use a proper registry for uma backends
+      for (const String registered_target_name : ::tvm::TargetKindRegEntry::ListTargetKinds()) {
+        if (registered_target_name == target_name) {
+          return false;
         }
+      }
 
       auto target_kind =
           ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)

From 011d2f2c87972c841902d5ceb8d8d79ee92db15c Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Wed, 27 Jul 2022 18:27:12 +0200
Subject: [PATCH 099/112] Remove tir to runtime

---
 .../relay/backend/contrib/uma/api/codegen.py  | 11 +--------
 .../backend/contrib/uma/tir_to_runtime.cc     | 24 -------------------
 2 files changed, 1 insertion(+), 34 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index c044ef23cbda..5e2782e144b5 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -36,8 +36,7 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
 
     def _register_c_codegen(
         self,
-        includes: Optional[Callable[[], str]] = None,
-        replace_call_extern: Optional[Callable[[tvm.ir.container.Array], str]] = None,
+        includes: Optional[Callable[[], str]] = None
     ) -> None:
         """Registration of UMA helper functions, e.g. includes and replace_call_extern.
 
@@ -45,8 +44,6 @@ def _register_c_codegen(
         ----------
         includes : OptionalCallable[[], str]]
             user-defined function that adds C-#include statement to UMA C-Code.
-        replace_call_extern:  OptionalCallable[[tvm.ir.container.Array], str]]
-            user-definde function that defines how to replace extern call in UMA C-Code.
         """
         if includes is not None:
             tvm._ffi.register_func(
@@ -54,12 +51,6 @@ def _register_c_codegen(
                 includes,
                 override=True,
             )
-        if replace_call_extern is not None:
-            tvm._ffi.register_func(
-                f"relay.ext.uma.codegen_c_replace_call_extern_{self.target_name}",
-                replace_call_extern,
-                override=True,
-            )
 
     def register(self) -> None:
         pass
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index 474956ae6763..77d90fd2b27f 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -58,30 +58,6 @@ class UMACodegen : public codegen::CodeGenCHost {
  private:
   String target_str_;
 
-  using codegen::CodeGenCHost::VisitStmt_;
-
-  /*!  * \brief Emits target specific APIs for every call_extern */
-  void VisitExpr_(const CallNode* op, std::ostream& os) final {
-    if (!op->op.same_as(builtin::call_extern())) {
-      CodeGenCHost::VisitExpr_(op, os);
-      return;
-    }
-    auto replace_call_extern_pf =
-        tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_replace_call_extern_" + target_str_);
-    if (replace_call_extern_pf == nullptr) {
-      CodeGenCHost::VisitExpr_(op, os);
-    } else {
-      // - funtion type (void) still gets printed before CallNode if extern call is wrapped in
-      // EvaluateNode
-      // - VarNode arguments might have "wrong" name_hints. The correct variable name is determined
-      // in C++ through GetVarID
-      String api_string = (*replace_call_extern_pf)(op->args);
-      os << api_string;
-    }
-    return;
-  }
-};
-
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
   bool emit_asserts = false;

From 4875ed282969e3a5e05311edadfaced244672864 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Thu, 28 Jul 2022 10:33:59 +0200
Subject: [PATCH 100/112] [UMA] Use sequential for UMA relay passes

---
 .../backend/contrib/uma/api/partitioner.py    | 38 ++++++++++---------
 .../tvm/relay/backend/contrib/uma/backend.py  | 10 ++---
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 23c3baa5335c..30cbc3ebb2e9 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -93,26 +93,30 @@ def partition(
         if params:
             mod["main"] = bind_params_by_name(mod["main"], params)
 
-        mod = relay.transform.InferType()(mod)
-        mod = tvm.transform.Sequential(
-            [p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING]
-        )(mod)
-        mod = relay.transform.MergeComposite(self._pattern_table())(mod)
-        mod = relay.transform.AnnotateTarget(self.target_name)(mod)
+        pass_sequence = []
+        pass_sequence.append(relay.transform.InferType())
+        pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING])
+        pass_sequence.append(relay.transform.MergeComposite(self._pattern_table()))
+        pass_sequence.append(relay.transform.AnnotateTarget(self.target_name))
         if self.merge_compiler_regions:
-            mod = relay.transform.MergeCompilerRegions()(mod)
-        mod = relay.transform.InferType()(mod)
-        mod = relay.transform.PartitionGraph()(mod)
-        mod = relay.transform.InferType()(mod)
-        mod = tvm.transform.Sequential(
-            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0]
-        )(mod)
-        mod = relay.transform.InferType()(mod)
+            pass_sequence.append(relay.transform.MergeCompilerRegions())
+        pass_sequence.append(relay.transform.InferType())
+        pass_sequence.append(relay.transform.PartitionGraph())
+        pass_sequence.append(relay.transform.InferType())
+        pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0])
+        
+        pass_sequence.append(relay.transform.InferType())
+        sequential_passes = tvm.transform.Sequential(pass_sequence)
+        mod = sequential_passes(mod)
+
+
         # Defunctionalize the partitioned functions to allow lowering
         for gvar, func in mod.functions.items():
             mod.update_func(gvar, relay.transform.Defunctionalization(func, mod))
-        mod = tvm.transform.Sequential(
-            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1]
-        )(mod)
+        
+        post_partition_passes_1 = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1])
+        mod = post_partition_passes_1(mod)
+
+        
 
         return mod
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 156f4cfd7251..8dfbf1655f59 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -252,14 +252,13 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         .. code-block:: python
 
             self._register_codegen(
-                fmt="c", includes=gen_includes, replace_call_extern=gen_replace_call_extern
+                fmt="c", includes=gen_includes
             )
 
-        The C-codegen provides two hooks which allows the user to insert code through
+        The C-codegen currently provides one hook which allows the user to insert code through
         the python API.
             - `includes` hooks into the include stream and allows insertion of custom includes.
-            - `replace_call_extern` hooks into the expression visitor and allows the user to insert
-            custom code for a given extern call.
+           
 
         The code generation functions can look like this:
 
@@ -268,9 +267,6 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
             def gen_includes() -> str:
                 includes = "#include <my_custom_header.h>\n"
                 return includes
-
-            def gen_replace_call_extern(args: tvm.ir.container.Array) -> str:
-                return "my_custom_api_function({}, {}, {})".format(*args)
         """
         self._tir_to_runtime._register_codegen(fmt, **kwargs)
 

From ccb4f4cb642b0ddfb24f497e1718496257d6fe4d Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Thu, 28 Jul 2022 15:49:03 +0200
Subject: [PATCH 101/112] Use comparison against BYOC flow in test_partition

---
 .../backend/contrib/uma/api/partitioner.py    |  6 +--
 .../python/contrib/test_uma/test_partition.py | 48 ++++++++++++++-----
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 30cbc3ebb2e9..bfb149610c52 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -75,7 +75,7 @@ def register(self) -> None:
     def partition(
         self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
     ) -> tvm.IRModule:
-        """Partition the relay graph in by the NPU supported and unsupported parts.
+        """Partition the relay graph in parts supported and unsupported by the target hardware accelerator.
 
         Parameters
         ----------
@@ -94,18 +94,14 @@ def partition(
             mod["main"] = bind_params_by_name(mod["main"], params)
 
         pass_sequence = []
-        pass_sequence.append(relay.transform.InferType())
         pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING])
         pass_sequence.append(relay.transform.MergeComposite(self._pattern_table()))
         pass_sequence.append(relay.transform.AnnotateTarget(self.target_name))
         if self.merge_compiler_regions:
             pass_sequence.append(relay.transform.MergeCompilerRegions())
-        pass_sequence.append(relay.transform.InferType())
         pass_sequence.append(relay.transform.PartitionGraph())
-        pass_sequence.append(relay.transform.InferType())
         pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0])
         
-        pass_sequence.append(relay.transform.InferType())
         sequential_passes = tvm.transform.Sequential(pass_sequence)
         mod = sequential_passes(mod)
 
diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index 217f23083caf..baaf52a40e7e 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -18,12 +18,14 @@
 import pytest
 
 import tvm
+import tvm.relay as relay
 
 from tvm.relay.backend.contrib.uma.api import UMAPartitioner
 from tvm.relay.op.contrib.register import get_pattern_table
 from tvm.relay.testing import resnet, mlp
 from tvm.relay.backend.contrib.uma import uma_available
 
+
 pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
 
 
@@ -37,19 +39,21 @@ def test_partition_table():
 
 
 @pytest.mark.parametrize(
-    "workload,backend,merge,expected_partitions",
+    "workload,backend,merge",
     [
-        ("resnet", "dnnl", False, 17),
-        ("resnet", "dnnl", True, 17),
-        ("mlp", "dnnl", False, 1),
-        ("resnet", "cutlass", False, 2),
-        ("resnet", "cutlass", True, 2),
-        ("mlp", "cutlass", False, 4),
-        ("mlp", "cutlass", True, 2),
+        ("resnet", "dnnl", False),
+        ("resnet", "dnnl", True),
+        ("mlp", "dnnl", False),
+        ("mlp", "dnnl", True),
+        ("resnet", "cutlass", False),
+        ("resnet", "cutlass", True),
+        ("mlp", "cutlass", False),
+        ("mlp", "cutlass", True),
     ],
 )
-def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
-    partitioner = UMAPartitioner(backend + "_uma", merge)
+def test_existing_pattern_tables(workload, backend, merge):
+    """Tests that uma partitioner creates the same partitions than default BYOC partitioning"""
+    partitioner = UMAPartitioner(backend, merge)
     pattern_table = get_pattern_table(backend)
 
     for entry in pattern_table:
@@ -67,9 +71,29 @@ def test_existing_pattern_tables(workload, backend, merge, expected_partitions):
 
     partitioner.register()
     partitioned_mod = partitioner.partition(mod)
-    print(partitioned_mod)
 
-    assert len(partitioned_mod.functions) == expected_partitions
+
+
+    def partition_default(mod):
+        """partitions using default BYOC flow"""
+
+        sequence = [
+            relay.transform.MergeComposite(pattern_table),
+            relay.transform.AnnotateTarget(backend),
+        ]
+
+        if merge:
+            sequence.append(relay.transform.MergeCompilerRegions())
+
+        
+        sequence.append(relay.transform.PartitionGraph())
+        sequential = tvm.transform.Sequential(sequence)
+
+        return sequential(mod)
+
+    default_partitioned_mod = partition_default(mod)
+
+    assert len(partitioned_mod.functions) == len(default_partitioned_mod.functions)
 
 
 if __name__ == "__main__":

From 1797ae4c89e913ea9f1cbad9f2d500e1c19fa6b7 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Wed, 27 Jul 2022 18:32:43 +0200
Subject: [PATCH 102/112] [UMA] tutorial update: moved code blocks to RST

---
 apps/uma/_template/passes.py |  15 ++--
 gallery/tutorial/uma.py      | 134 +++++++++++++++++------------------
 2 files changed, 75 insertions(+), 74 deletions(-)

diff --git a/apps/uma/_template/passes.py b/apps/uma/_template/passes.py
index c943149f2367..c4ba0e5c168b 100644
--- a/apps/uma/_template/passes.py
+++ b/apps/uma/_template/passes.py
@@ -23,18 +23,19 @@
 
 @tvm.tir.transform.prim_func_pass(opt_level=2)
 class MyAiHwConv2dPass:
+    _EXTERNAL_FUNCTION_NAME = "my_ai_hw_conv2dnchw"
+    _TVM_BLOCK_MATCH_NAME = "conv2d_nchw"
+
     def transform_function(
         self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
     ) -> tvm.tir.PrimFunc:
         return self._my_ai_hw_conv2d_pass(func, mod, ctx)
 
-    @staticmethod
-    def _my_ai_hw_conv2d_pass(func, mod, ctx):
+    @classmethod
+    def _my_ai_hw_conv2d_pass(cls, func, mod, ctx):
         _loops = dict()
         _handles = []
         _entry_node = None
-        _external_function_name = "my_ai_hw_conv2dnchw"
-        _tvm_block_match_name = "conv2d_nchw"
 
         def _has_block(name: str, func: tvm.tir.PrimFunc) -> bool:
             """
@@ -64,7 +65,7 @@ def _replace_conv2d(op):
                     offsets = [_loops[i].extent.value for i in offset_order]
                     args = buffers + offsets
                     external_call = tvm.tir.Evaluate(
-                        tir_call(irb, True, _external_function_name, *args)
+                        tir_call(irb, True, cls._EXTERNAL_FUNCTION_NAME, *args)
                     )
                     ext_calls = tvm.tir.SeqStmt([external_call])
                     irb.emit(ext_calls)
@@ -77,8 +78,8 @@ def _replace_conv2d(op):
 
             sch = tir.Schedule(func)
 
-            if _has_block(_tvm_block_match_name, func):
-                conv2d_block = sch.get_block(_tvm_block_match_name)
+            if _has_block(cls._TVM_BLOCK_MATCH_NAME, func):
+                conv2d_block = sch.get_block(cls._TVM_BLOCK_MATCH_NAME)
                 rv_loops = sch.get_loops(conv2d_block)
                 assert len(rv_loops) == 7
                 loops = dict(
diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index 0708b238c87c..b3b7f047e82e 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -21,20 +21,24 @@
 ===================================================
 **Author**: `Michael J. Klaiber <https://github.com/MichaelJKlaiber>`_
 
-This is an introductory tutorial to the **Universal Modular Accelerator Interface** (UMA).
-UMA provides an easy-to-use API to integrate new hardware accelerators into TVM.
-
-This tutorial gives you step-by-step guidance how to use UMA to
-make your hardware accelerator TVM-ready.
-While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
-API to integrate a number of hardware accelerator classes into TVM.
+"""
 
 
-In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
-In these use case the three mock-accelerators
-**Vanilla**, **Strawberry** and **Chocolate** are introduced and
-integrated into TVM using UMA.
-"""
+######################################################################
+# This is an introductory tutorial to the **Universal Modular Accelerator Interface** (UMA).
+# UMA provides an easy-to-use API to integrate new hardware accelerators into TVM.
+#
+# This tutorial gives you step-by-step guidance how to use UMA to
+# make your hardware accelerator TVM-ready.
+# While there is no one-fits-all solution for this problem, UMA targets to provide a stable and Python-only
+# API to integrate a number of hardware accelerator classes into TVM.
+#
+#
+# In this tutorial you will get to know the UMA API in three use cases of increasing complexity.
+# In these use case the three mock-accelerators
+# **Vanilla**, **Strawberry** and **Chocolate** are introduced and
+# integrated into TVM using UMA.
+#
 
 # sphinx_gallery_start_ignore
 from tvm import testing
@@ -42,15 +46,6 @@
 testing.utils.install_request_hook(depth=3)
 # sphinx_gallery_end_ignore
 
-import tvm
-from tvm.relay.backend.contrib.uma.backend import UMABackend
-from tvm.relay.dataflow_pattern import is_op, wildcard
-from tvm.relay.backend.contrib.uma.api.utils import PassPhase
-from apps.uma._template.passes import (
-    MyAiHwConv2dPass as VanillaAcceleratorConv2DPass,
-)
-from apps.uma._template.codegen import gen_includes
-
 
 ######################################################################
 # Vanilla
@@ -107,33 +102,40 @@
 #
 #  The generated backend for vanilla is found in `vanilla_accelerator/backend.py`:
 
-
-class VanillaAcceleratorBackend(UMABackend):
-    """UMA backend for VanillaAccelerator."""
-
-    def __init__(self):
-        super().__init__()
-
-        self._register_pattern("conv2d", conv2d_pattern())
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2DPass())
-        self._register_codegen(fmt="c", includes=gen_includes)
-
-    @property
-    def target_name(self):
-        return "vanilla_accelerator"
+######################################################################
+#
+# .. code-block:: python
+#
+#  class VanillaAcceleratorBackend(UMABackend):
+#      """UMA backend for VanillaAccelerator."""
+#
+#      def __init__(self):
+#          super().__init__()
+#
+#          self._register_pattern("conv2d", conv2d_pattern())
+#          self._register_tir_pass(PassPhase.TIR_PHASE_0, VanillaAcceleratorConv2DPass())
+#          self._register_codegen(fmt="c", includes=gen_includes)
+#
+#      @property
+#      def target_name(self):
+#          return "vanilla_accelerator"
 
 
 ################################################################################
 # Define offloaded patterns
 #
-# To specify that `Conv2D` is offloaded to **Vanilla**, it is described as Relay dataflow pattern (`DFPattern <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_) in
-# `vanilla_accelerator/patterns.py`
+# To specify that `Conv2D` is offloaded to **Vanilla**, it is described as Relay dataflow pattern
+# (`DFPattern <https://tvm.apache.org/docs/reference/langref/relay_pattern.html>`_) in `vanilla_accelerator/patterns.py`
 
 
-def conv2d_pattern():
-    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
-    pattern = pattern.has_attr({"strides": [1, 1]})
-    return pattern
+################################################################################
+#
+# .. code-block:: python
+#
+#  def conv2d_pattern():
+#      pattern = is_op("nn.conv2d")(wildcard(), wildcard())
+#      pattern = pattern.has_attr({"strides": [1, 1]})
+#      return pattern
 
 
 ################################################################################
@@ -178,32 +180,30 @@ def conv2d_pattern():
 # The file ``vanilla_accelerator/run.py`` provides a demo running a Conv2D layer
 # making use of Vanilla's C-API.
 #
-# Excerpt from vanilla_accelerator/run.py:
-from apps.uma._template.run import create_conv2d
-from tvm.testing.aot import AOTTestModel as AOTModel, compile_and_run
-
-
-def main():
-    mod, inputs, output_list, runner = create_conv2d()
-
-    uma_backend = VanillaAcceleratorBackend()
-    uma_backend.register()
-    mod = uma_backend.partition(mod)
-    target = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
-
-    export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
-    print(f"Generated files are in {export_directory}")
-    compile_and_run(
-        AOTModel(module=mod, inputs=inputs, outputs=output_list),
-        runner,
-        interface_api="c",
-        use_unpacked_api=True,
-        target=target,
-        test_dir=str(export_directory),
-    )
-
-
-main()
+#
+# .. code-block:: python
+#
+#  def main():
+#      mod, inputs, output_list, runner = create_conv2d()
+#
+#      uma_backend = VanillaAcceleratorBackend()
+#      uma_backend.register()
+#      mod = uma_backend.partition(mod)
+#      target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
+#
+#      export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
+#      print(f"Generated files are in {export_directory}")
+#      compile_and_run(
+#          AOTModel(module=mod, inputs=inputs, outputs=output_list),
+#          runner,
+#          interface_api="c",
+#          use_unpacked_api=True,
+#          target=target,
+#          test_dir=str(export_directory),
+#      )
+#
+#
+#  main()
 
 ############################################################
 # By running ``vanilla_accelerator/run.py`` the output files are generated in the model library format (MLF).

From b1f02e1a10695711302ec1d93abc61522e4a26f9 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Thu, 28 Jul 2022 14:37:52 +0200
Subject: [PATCH 103/112] [UMA] tutorial update and lint fixes

---
 gallery/tutorial/uma.py                             | 3 ++-
 python/tvm/relay/backend/contrib/uma/api/codegen.py | 5 +----
 python/tvm/relay/backend/contrib/uma/backend.py     | 2 +-
 src/relay/backend/contrib/uma/targets.cc            | 4 ++--
 src/relay/backend/contrib/uma/tir_to_runtime.cc     | 3 ++-
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/gallery/tutorial/uma.py b/gallery/tutorial/uma.py
index b3b7f047e82e..ed4fc4cf805c 100644
--- a/gallery/tutorial/uma.py
+++ b/gallery/tutorial/uma.py
@@ -19,7 +19,8 @@
 
 Making your Hardware Accelerator TVM-ready with UMA
 ===================================================
-**Author**: `Michael J. Klaiber <https://github.com/MichaelJKlaiber>`_
+**Authors**: `Michael J. Klaiber <https://github.com/MichaelJKlaiber>`_, `Christoph Gerum <https://github.com/cgerum>`_,
+`Paul Palomero Bernardo <https://github.com/PaulPalomeroBernardo/>`_
 
 """
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index 5e2782e144b5..8ba17d30de8d 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -34,10 +34,7 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         else:
             raise RuntimeError(f'Unsupported codegen format "{fmt}"')
 
-    def _register_c_codegen(
-        self,
-        includes: Optional[Callable[[], str]] = None
-    ) -> None:
+    def _register_c_codegen(self, includes: Optional[Callable[[], str]] = None) -> None:
         """Registration of UMA helper functions, e.g. includes and replace_call_extern.
 
         Parameters
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 8dfbf1655f59..2586a41b3961 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -258,7 +258,7 @@ def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
         The C-codegen currently provides one hook which allows the user to insert code through
         the python API.
             - `includes` hooks into the include stream and allows insertion of custom includes.
-           
+
 
         The code generation functions can look like this:
 
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index 64e75fd39c61..e17feb82d2d5 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -67,11 +67,11 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         auto default_value = attr_option.second;
         if (default_value->IsInstance<StringObj>()) {
           target_kind.add_attr_option<String>(option_name, Downcast<String>(default_value));
-        }else if(default_value->IsInstance<IntImmNode>()) {
+        } else if (default_value->IsInstance<IntImmNode>()) {
           target_kind.add_attr_option<Integer>(option_name, Downcast<Integer>(default_value));
         } else {
           LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey()
-                     << " can not be added. Only String, Integer, or Bool are supported."; 
+                     << " can not be added. Only String, Integer, or Bool are supported.";
         }
       }
       return true;
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index 77d90fd2b27f..e575e08464fd 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -57,6 +57,7 @@ class UMACodegen : public codegen::CodeGenCHost {
 
  private:
   String target_str_;
+};
 
 runtime::Module TIRToRuntime(IRModule mod, Target target) {
   bool output_ssa = false;
@@ -75,6 +76,6 @@ runtime::Module TIRToRuntime(IRModule mod, Target target) {
 }
 
 }  // namespace uma
-}  // namespace contrib
+};  // namespace contrib
 }  // namespace relay
 }  // namespace tvm

From 0cbe6252b95462d662845618d7c0a309574ca72d Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Mon, 1 Aug 2022 10:56:37 +0200
Subject: [PATCH 104/112] [UMA]  removing UMA from i386 build, as there is a
 fail in the CI pipeline due to missing CLANG for i386

---
 src/relay/backend/contrib/uma/relay_to_tir.cc | 2 +-
 tests/scripts/task_config_build_i386.sh       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index 5bb64663efe8..a2377c726d99 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -154,7 +154,7 @@ TVM_REGISTER_GLOBAL("relay.ext.uma.OutlineCompilerFunctions")
     .set_body_typed(OutlineCompilerFunctions);
 
 /*!
- * \brief This pass will lower NPU functions in a Relay module to scheduled TIR prim functions.
+ * \brief This pass will lower UMA functions in a Relay module to scheduled TIR prim functions.
  */
 tvm::transform::Pass RelayToTIR(String target_name) {
   runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
diff --git a/tests/scripts/task_config_build_i386.sh b/tests/scripts/task_config_build_i386.sh
index 20f6c4bf0749..a570e9801ad3 100755
--- a/tests/scripts/task_config_build_i386.sh
+++ b/tests/scripts/task_config_build_i386.sh
@@ -34,6 +34,6 @@ echo set\(USE_VTA_FSIM ON\) >> config.cmake
 echo set\(USE_VTA_TSIM ON\) >> config.cmake
 echo set\(USE_VERILATOR ON\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
-echo set\(USE_UMA ON\) >> config.cmake
+echo set\(USE_UMA OFF\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
 

From af16ae28928e3f820c737a498cc65ebe1279aed0 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 2 Aug 2022 10:20:15 +0000
Subject: [PATCH 105/112] [BYOC-DNNL] covered case for sum node without attr

---
 python/tvm/relay/op/contrib/dnnl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py
index b8c49176ac8f..f76d4bd10daf 100644
--- a/python/tvm/relay/op/contrib/dnnl.py
+++ b/python/tvm/relay/op/contrib/dnnl.py
@@ -249,6 +249,8 @@ def predicate(expr):
         for e, op_name in zip([expr, expr.args[0]], ["sum", "bias_add"]):
             args = get_args(e)
             attrs = get_attrs(e.args[0])
+            if attrs is None:
+                return False
             if not checker(attrs, args, op_name):
                 return False
         return True

From ab670d1f4f760f69454ee3ef94472c32b461ff00 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 2 Aug 2022 15:57:24 +0200
Subject: [PATCH 106/112] [UMA] pylint

---
 .../backend/contrib/uma/api/partitioner.py    | 19 +++++++++++--------
 .../python/contrib/test_uma/test_partition.py |  3 ---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index bfb149610c52..54dc08cfcf7a 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -94,25 +94,28 @@ def partition(
             mod["main"] = bind_params_by_name(mod["main"], params)
 
         pass_sequence = []
-        pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING])
+        pass_sequence.extend(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING]
+        )
         pass_sequence.append(relay.transform.MergeComposite(self._pattern_table()))
         pass_sequence.append(relay.transform.AnnotateTarget(self.target_name))
         if self.merge_compiler_regions:
             pass_sequence.append(relay.transform.MergeCompilerRegions())
         pass_sequence.append(relay.transform.PartitionGraph())
-        pass_sequence.extend([p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0])
-        
+        pass_sequence.extend(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0]
+        )
+
         sequential_passes = tvm.transform.Sequential(pass_sequence)
         mod = sequential_passes(mod)
 
-
         # Defunctionalize the partitioned functions to allow lowering
         for gvar, func in mod.functions.items():
             mod.update_func(gvar, relay.transform.Defunctionalization(func, mod))
-        
-        post_partition_passes_1 = tvm.transform.Sequential([p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1])
-        mod = post_partition_passes_1(mod)
 
-        
+        post_partition_passes_1 = tvm.transform.Sequential(
+            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1]
+        )
+        mod = post_partition_passes_1(mod)
 
         return mod
diff --git a/tests/python/contrib/test_uma/test_partition.py b/tests/python/contrib/test_uma/test_partition.py
index baaf52a40e7e..ec2107f881bc 100644
--- a/tests/python/contrib/test_uma/test_partition.py
+++ b/tests/python/contrib/test_uma/test_partition.py
@@ -72,8 +72,6 @@ def test_existing_pattern_tables(workload, backend, merge):
     partitioner.register()
     partitioned_mod = partitioner.partition(mod)
 
-
-
     def partition_default(mod):
         """partitions using default BYOC flow"""
 
@@ -85,7 +83,6 @@ def partition_default(mod):
         if merge:
             sequence.append(relay.transform.MergeCompilerRegions())
 
-        
         sequence.append(relay.transform.PartitionGraph())
         sequential = tvm.transform.Sequential(sequence)
 

From ad616559cc31b63da23286e8acf3f222abb5d0c3 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 2 Aug 2022 14:13:43 +0000
Subject: [PATCH 107/112] [UMA] pylint

---
 python/tvm/relay/backend/contrib/uma/api/partitioner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/partitioner.py b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
index 54dc08cfcf7a..48cac81d13d8 100644
--- a/python/tvm/relay/backend/contrib/uma/api/partitioner.py
+++ b/python/tvm/relay/backend/contrib/uma/api/partitioner.py
@@ -75,7 +75,8 @@ def register(self) -> None:
     def partition(
         self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
     ) -> tvm.IRModule:
-        """Partition the relay graph in parts supported and unsupported by the target hardware accelerator.
+        """Partition the relay graph in parts supported and unsupported by the
+        target hardware accelerator.
 
         Parameters
         ----------

From 34a3b694eb67a932e8757e79de1612ea78995a25 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Tue, 2 Aug 2022 23:52:10 +0200
Subject: [PATCH 108/112] [UMA] aot fix

---
 python/tvm/testing/aot.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 46fdae40a88c..5d7fb62cd204 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -869,13 +869,13 @@ def compile_and_run(
         Prints commands to build and run AOT test runner
     """
 
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-
     if target_opts:
         for key, val in target_opts.items():
             target += f" {key}={val}"
 
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+
     compiled_test_mods = compile_models(
         models=models,
         interface_api=interface_api,

From ac2379eb36a4ac8c08021fab02f46b7ff540c7fd Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 5 Aug 2022 21:17:31 +0200
Subject: [PATCH 109/112] [UMA] Changes PR review

---
 apps/uma/_template/__init__.py                |  1 -
 apps/uma/_template/conv2dnchw.cc              |  1 +
 apps/uma/_template/passes.py                  | 12 +++------
 apps/uma/_template/strategies.py              | 16 ++++++++++++
 include/tvm/relay/transform.h                 |  1 -
 .../relay/backend/contrib/uma/api/codegen.py  | 15 +++++++++--
 .../relay/backend/contrib/uma/api/lower.py    | 23 +++++++++--------
 .../tvm/relay/backend/contrib/uma/backend.py  |  8 ++++++
 src/relay/backend/contrib/uma/relay_to_tir.cc |  1 +
 src/relay/backend/contrib/uma/targets.cc      |  4 +--
 .../backend/contrib/uma/tir_to_runtime.cc     |  7 +++---
 tests/python/contrib/test_uma/test_target.py  | 25 +++++++++++++++++++
 12 files changed, 85 insertions(+), 29 deletions(-)

diff --git a/apps/uma/_template/__init__.py b/apps/uma/_template/__init__.py
index 500eb327796e..2cc0ee880d76 100644
--- a/apps/uma/_template/__init__.py
+++ b/apps/uma/_template/__init__.py
@@ -18,6 +18,5 @@
 
 Template files for UMA tutorial
 
-Do not import
 
 """
diff --git a/apps/uma/_template/conv2dnchw.cc b/apps/uma/_template/conv2dnchw.cc
index 2ee081b3c3ca..03b401ec29ea 100644
--- a/apps/uma/_template/conv2dnchw.cc
+++ b/apps/uma/_template/conv2dnchw.cc
@@ -18,6 +18,7 @@
 */
 #include <stdlib.h>
 
+//TODO(mjklaiber): leverage pragma import_c in the future
 #ifdef __cplusplus
 extern "C"
 #endif
diff --git a/apps/uma/_template/passes.py b/apps/uma/_template/passes.py
index c4ba0e5c168b..b4f261a5ab49 100644
--- a/apps/uma/_template/passes.py
+++ b/apps/uma/_template/passes.py
@@ -64,11 +64,7 @@ def _replace_conv2d(op):
                     offset_order = ["co", "w", "h", "ci", "kh", "kw"]
                     offsets = [_loops[i].extent.value for i in offset_order]
                     args = buffers + offsets
-                    external_call = tvm.tir.Evaluate(
-                        tir_call(irb, True, cls._EXTERNAL_FUNCTION_NAME, *args)
-                    )
-                    ext_calls = tvm.tir.SeqStmt([external_call])
-                    irb.emit(ext_calls)
+                    irb.emit(tir_call(irb, True, cls._EXTERNAL_FUNCTION_NAME, *args))
                     irb_result = irb.get()
                     return irb_result
                 elif isinstance(op, tvm.tir.SeqStmt):
@@ -129,7 +125,7 @@ def buf_from_array(ib, arr, dtype):
 
     if extern:
         args = [i.data if isinstance(i, tvm.tir.Buffer) else i for i in args]
-        call = tvm.tir.call_extern("int32", name, *args)
+        return tvm.tir.call_extern("int32", name, *args)
     else:
         args = [
             buf_from_array(ib, i, "int32")
@@ -137,6 +133,4 @@ def buf_from_array(ib, arr, dtype):
             else i
             for i in args
         ]
-        call = tvm.tir.call_packed(name, *args)
-
-    return call
+        return tvm.tir.call_packed(name, *args)
diff --git a/apps/uma/_template/strategies.py b/apps/uma/_template/strategies.py
index c7680cbaf849..aa1ea07280e4 100644
--- a/apps/uma/_template/strategies.py
+++ b/apps/uma/_template/strategies.py
@@ -15,3 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 """Strategies for the my_ai_hw accelerator"""
+
+# Example how to integrate a custom conv1d strategy:
+
+# @relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
+# def custom_conv1d_strategy(attrs, inputs, out_type, target):
+#     strategy = _op.OpStrategy()
+#     strategy.add_implementation(
+#         wrap_compute_conv1d(custom_conv1d_compute),
+#         wrap_topi_schedule(custom_conv1d_schedule),
+#         name="custom_conv1d.generic",
+#     return strategy
+#
+
+# For further details see:
+# - github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md
+# - $TVM_HOME/python/tvm/relay/op/strategy/x86.py
diff --git a/include/tvm/relay/transform.h b/include/tvm/relay/transform.h
index 6b26b9eff474..b37d0f83adf3 100644
--- a/include/tvm/relay/transform.h
+++ b/include/tvm/relay/transform.h
@@ -509,7 +509,6 @@ TVM_DLL Pass SimplifyExpr();
  *
  * \param config All available targets.
  *
- *
  * \return The pass.
  */
 TVM_DLL Pass RelayToTIRTargetHook(CompilationConfig config);
diff --git a/python/tvm/relay/backend/contrib/uma/api/codegen.py b/python/tvm/relay/backend/contrib/uma/api/codegen.py
index 8ba17d30de8d..8bbb77c91b44 100644
--- a/python/tvm/relay/backend/contrib/uma/api/codegen.py
+++ b/python/tvm/relay/backend/contrib/uma/api/codegen.py
@@ -28,9 +28,20 @@ class UMACodegen(object):
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
 
-    def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
+    def _register_codegen(
+        self, fmt: str = "c", includes: Optional[Callable[[], str]] = None, **kwargs
+    ) -> None:
+        """Registration codegen in UMA.
+
+        Parameters
+        ----------
+        fmt: str
+            format of codegen. Currently only "c" is supported.
+        includes : OptionalCallable[[], str]]
+            user-defined function that adds C-#include statement to UMA C-Code.
+        """
         if fmt == "c":
-            self._register_c_codegen(**kwargs)
+            self._register_c_codegen(includes, **kwargs)
         else:
             raise RuntimeError(f'Unsupported codegen format "{fmt}"')
 
diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index f7178bf30580..dc85e4a6bd90 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -24,23 +24,24 @@
 from . import _ffi_api
 from .utils import PassPhase
 
+OperatorStrategies = List[
+    Tuple[
+        str,
+        Callable[
+            [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
+            tvm.relay.op.op.OpStrategy,
+        ],
+        Optional[int],
+    ]
+]
+
 
 class UMALower:
     """Lowering base class of the Universal Modular Accelerator Interface (UMA)."""
 
     def __init__(self, target_name: str) -> None:
         self.target_name = target_name
-
-        self._operator_strategies: List[
-            Tuple[
-                str,
-                Callable[
-                    [tvm.ir.Attrs, tvm.ir.Array, tvm.ir.TensorType, tvm.target.Target],
-                    tvm.relay.op.op.OpStrategy,
-                ],
-                Optional[int],
-            ]
-        ] = []
+        self._operator_strategies: OperatorStrategies = []
         self._tir_passes: List[Tuple[PassPhase, tvm.tir.transform.PrimFuncPass]] = []
 
     def _lower_relay_to_tir(self, relay_prim_func: relay.Function) -> tvm.tir.PrimFunc:
diff --git a/python/tvm/relay/backend/contrib/uma/backend.py b/python/tvm/relay/backend/contrib/uma/backend.py
index 2586a41b3961..40ec06e45367 100644
--- a/python/tvm/relay/backend/contrib/uma/backend.py
+++ b/python/tvm/relay/backend/contrib/uma/backend.py
@@ -272,8 +272,16 @@ def gen_includes() -> str:
 
     # Backend functions
     def register(self) -> None:
+        """
+        Registering UMABackend:
+         registering target attributes, relay_to_relay, relay_to_tir and tir_to_runtime
+        """
         registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
 
+        for name, attr in self._target_attrs:
+            if attr is None:
+                raise ValueError("Target attribute None is not supported.")
+
         if registration_func(self.target_name, self._target_attrs):
             self._relay_to_relay.register()
             self._relay_to_tir.register()
diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index a2377c726d99..fb12fce9df17 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -42,6 +42,7 @@ namespace relay {
 namespace contrib {
 namespace uma {
 
+//TODO(@mjklaiber, @manupa-arm, @areusch) move this to include
 /*!
  * \brief This mutator outlines functions that are marked with a named
  * "Compiler" attribute. Functions that do not match this condition remain
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index e17feb82d2d5..fd887a0304dc 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -70,8 +70,8 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         } else if (default_value->IsInstance<IntImmNode>()) {
           target_kind.add_attr_option<Integer>(option_name, Downcast<Integer>(default_value));
         } else {
-          LOG(FATAL) << "Attribute option of type " << attr_option.second->GetTypeKey()
-                     << " can not be added. Only String, Integer, or Bool are supported.";
+                  LOG(FATAL) << "Only String, Integer, or Bool are supported. Given attribute option type: "
+                  << attr_option.second->GetTypeKey();
         }
       }
       return true;
diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index e575e08464fd..1f78744dcad2 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -40,9 +40,10 @@ class UMACodegen : public codegen::CodeGenCHost {
   void Init(bool output_ssa, bool emit_asserts) {
     auto includes_pf =
         tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str_);
-    ICHECK(includes_pf);
-    String includes = (*includes_pf)();
-    decl_stream << includes;
+    if (includes_pf){
+        String includes = (*includes_pf)();
+        decl_stream << includes;
+    }
     std::unordered_set<std::string> devices;
     devices.insert(target_str_);
     CodeGenCHost::Init(output_ssa, emit_asserts, target_str_, devices);
diff --git a/tests/python/contrib/test_uma/test_target.py b/tests/python/contrib/test_uma/test_target.py
index bb57e1ae4fa2..558c4e518230 100644
--- a/tests/python/contrib/test_uma/test_target.py
+++ b/tests/python/contrib/test_uma/test_target.py
@@ -14,9 +14,11 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from typing import Union
 
 import pytest
 import tvm
+from tests.python.contrib.test_uma.test_uma_vanilla_accelerator import VanillaAcceleratorBackend
 from tvm.relay.backend.contrib.uma import uma_available
 
 pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
@@ -56,5 +58,28 @@ def test_uma_target(target_name, target_attrs, target_args):
         assert my_target.attrs[attr] == target_args[attr]
 
 
+@pytest.mark.parametrize(
+    "attr_name, target_attr",
+    [
+        ("float_attr", 3.14),
+        ("none_attr", None),
+    ],
+)
+def test_invalid_attr_option(attr_name: str, target_attr: Union[str, int, bool, float, None]):
+    if target_attr is None:
+        # None cannot be caught as TVMError, as it causes a SIGKILL, therefore it must be prevented to be
+        # entered into relay.backend.contrib.uma.RegisterTarget at Python level.
+        with pytest.raises(ValueError):
+            uma_backend = VanillaAcceleratorBackend()
+            uma_backend._target_attrs = {attr_name: target_attr}
+            uma_backend.register()
+    else:
+        registration_func = tvm.get_global_func("relay.backend.contrib.uma.RegisterTarget")
+        target_name = f"{attr_name}_{target_attr}"
+        target_attr = {attr_name: target_attr}
+        with pytest.raises(tvm.TVMError, match=r"Only String, Integer, or Bool are supported. .*"):
+            registration_func(target_name, target_attr)
+
+
 if __name__ == "__main__":
     tvm.testing.main()

From cd536cc932c0ddf078af7e9d9a231f814db6f123 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 5 Aug 2022 21:18:57 +0200
Subject: [PATCH 110/112] [UMA] cc lint

---
 apps/uma/_template/conv2dnchw.cc              | 2 +-
 src/relay/backend/contrib/uma/relay_to_tir.cc | 2 +-
 src/relay/backend/contrib/uma/targets.cc      | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/apps/uma/_template/conv2dnchw.cc b/apps/uma/_template/conv2dnchw.cc
index 03b401ec29ea..bfb4300e2aa3 100644
--- a/apps/uma/_template/conv2dnchw.cc
+++ b/apps/uma/_template/conv2dnchw.cc
@@ -18,7 +18,7 @@
 */
 #include <stdlib.h>
 
-//TODO(mjklaiber): leverage pragma import_c in the future
+// TODO(mjklaiber): leverage pragma import_c in the future
 #ifdef __cplusplus
 extern "C"
 #endif
diff --git a/src/relay/backend/contrib/uma/relay_to_tir.cc b/src/relay/backend/contrib/uma/relay_to_tir.cc
index fb12fce9df17..8aed69453158 100644
--- a/src/relay/backend/contrib/uma/relay_to_tir.cc
+++ b/src/relay/backend/contrib/uma/relay_to_tir.cc
@@ -42,7 +42,7 @@ namespace relay {
 namespace contrib {
 namespace uma {
 
-//TODO(@mjklaiber, @manupa-arm, @areusch) move this to include
+// TODO(@mjklaiber, @manupa-arm, @areusch) move this to include
 /*!
  * \brief This mutator outlines functions that are marked with a named
  * "Compiler" attribute. Functions that do not match this condition remain
diff --git a/src/relay/backend/contrib/uma/targets.cc b/src/relay/backend/contrib/uma/targets.cc
index fd887a0304dc..a17f6694f79f 100644
--- a/src/relay/backend/contrib/uma/targets.cc
+++ b/src/relay/backend/contrib/uma/targets.cc
@@ -70,8 +70,8 @@ TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
         } else if (default_value->IsInstance<IntImmNode>()) {
           target_kind.add_attr_option<Integer>(option_name, Downcast<Integer>(default_value));
         } else {
-                  LOG(FATAL) << "Only String, Integer, or Bool are supported. Given attribute option type: "
-                  << attr_option.second->GetTypeKey();
+          LOG(FATAL) << "Only String, Integer, or Bool are supported. Given attribute option type: "
+                     << attr_option.second->GetTypeKey();
         }
       }
       return true;

From 0418ad886f7bc320f6a709e80e0b6160a8a76f44 Mon Sep 17 00:00:00 2001
From: "Michael J. Klaiber" <michael.klaiber@de.bosch.com>
Date: Fri, 5 Aug 2022 21:19:06 +0200
Subject: [PATCH 111/112] [UMA] cc lint

---
 src/relay/backend/contrib/uma/tir_to_runtime.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/relay/backend/contrib/uma/tir_to_runtime.cc b/src/relay/backend/contrib/uma/tir_to_runtime.cc
index 1f78744dcad2..4b5cd4332476 100644
--- a/src/relay/backend/contrib/uma/tir_to_runtime.cc
+++ b/src/relay/backend/contrib/uma/tir_to_runtime.cc
@@ -40,9 +40,9 @@ class UMACodegen : public codegen::CodeGenCHost {
   void Init(bool output_ssa, bool emit_asserts) {
     auto includes_pf =
         tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + target_str_);
-    if (includes_pf){
-        String includes = (*includes_pf)();
-        decl_stream << includes;
+    if (includes_pf) {
+      String includes = (*includes_pf)();
+      decl_stream << includes;
     }
     std::unordered_set<std::string> devices;
     devices.insert(target_str_);

From 0f0b1bf108066a1c5dd4c08844898b4601e60b82 Mon Sep 17 00:00:00 2001
From: Christoph Gerum <christoph.gerum@uni-tuebingen.de>
Date: Fri, 5 Aug 2022 22:32:32 +0200
Subject: [PATCH 112/112] Use better function name for te_lowering and annotate
 current target at TE functions

---
 python/tvm/relay/backend/contrib/uma/api/lower.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/uma/api/lower.py b/python/tvm/relay/backend/contrib/uma/api/lower.py
index dc85e4a6bd90..34630949a151 100644
--- a/python/tvm/relay/backend/contrib/uma/api/lower.py
+++ b/python/tvm/relay/backend/contrib/uma/api/lower.py
@@ -82,15 +82,20 @@ def _get_tensors(te_cached_func):
 
             return args + outputs
 
-        f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
-        te_cached_func = f(relay_prim_func)
+        lower_to_te = tvm._ffi.get_global_func("relay.backend.LowerToTE")
+        te_cached_func = lower_to_te(relay_prim_func)
         x = _get_tensors(te_cached_func)
         tir_prim_func = te.create_prim_func(x)
         tir_prim_func = tir_prim_func.with_attr(
             "global_symbol", relay_prim_func.attrs["global_symbol"]
         )
-        # TODO: The target should probably come from somewhere else instead of being created here.
-        tir_prim_func = tir_prim_func.with_attr("target", tvm.target.Target(self.target_name))
+
+        compiler_attr = relay_prim_func.attrs["Compiler"]
+        target = tvm.target.Target.current()
+        if target.kind.name != compiler_attr:
+            target = tvm.target.Target(compiler_attr)
+
+        tir_prim_func = tir_prim_func.with_attr("target", target)
         tir_prim_func = tir_prim_func.with_attr("relay_attrs", relay_prim_func.attrs)
         return tir_prim_func