diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
index 62265eab111f0..0c4ca139c6318 100644
--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -47,10 +47,9 @@
 
 # tvm.target
 from . import target
-from .target import build_config
 
 # tvm.te
-from .te import decl_tensor_intrin, create_schedule, tag_scope
+from . import te
 
 # tvm.testing
 from . import testing
@@ -64,10 +63,6 @@
 # others
 from . import arith
 
-# backward compact for topi, to be removed later
-from .tir import expr, stmt, ir_builder, ir_pass, generic
-from .te import tensor, schedule
-
 # Contrib initializers
 from .contrib import rocm as _rocm, nvcc as _nvcc, sdaccel as _sdaccel
 
diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py
index 44e6de934649c..698ddbc68dd70 100644
--- a/python/tvm/autotvm/measure/measure_methods.py
+++ b/python/tvm/autotvm/measure/measure_methods.py
@@ -33,9 +33,13 @@
 
 import numpy as np
 
-from ... import ir_pass, build, build_config, nd, TVMError, register_func, \
-    rpc as _rpc, target as _target
-from ...contrib import nvcc, ndk, tar
+import tvm._ffi
+from tvm import nd, rpc as _rpc, target as _target
+from tvm.tir import ir_pass
+from tvm.error import TVMError
+from tvm.target import build_config
+from tvm.driver import build
+from tvm.contrib import nvcc, ndk, tar
 
 from ..util import get_const_tuple
 from ..env import AutotvmGlobalScope
@@ -581,7 +585,7 @@ def _check():
     return not t.is_alive()
 
 
-@register_func
+@tvm._ffi.register_func
 def tvm_callback_cuda_compile(code):
     """use nvcc to generate ptx code for better optimization"""
     curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch
diff --git a/python/tvm/autotvm/task/code_hash.py b/python/tvm/autotvm/task/code_hash.py
index d5358ec437e4d..3076970f84c96 100644
--- a/python/tvm/autotvm/task/code_hash.py
+++ b/python/tvm/autotvm/task/code_hash.py
@@ -22,7 +22,7 @@
 import inspect
 import zlib
 
-from tvm import schedule
+from tvm.te import schedule
 
 def attach_code_hash(s):
     """Decorator for attaching a code hash to a schedule
diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py
index 1a381069112e8..e1c09133eb234 100644
--- a/python/tvm/autotvm/task/topi_integration.py
+++ b/python/tvm/autotvm/task/topi_integration.py
@@ -28,8 +28,8 @@
 """
 import tvm.te._ffi_api
 from tvm import target as _target
+from tvm.te import tensor
 
-from ... import tensor
 from .task import args_to_workload, DispatchContext, \
     register_task_compute, register_task_schedule, serialize_args
 
diff --git a/python/tvm/autotvm/util.py b/python/tvm/autotvm/util.py
index 54001d3338ad7..01d50e86a88a3 100644
--- a/python/tvm/autotvm/util.py
+++ b/python/tvm/autotvm/util.py
@@ -24,7 +24,7 @@
 
 import numpy as np
 
-from .. import expr, ir_pass
+from tvm.tir import expr, ir_pass
 
 logger = logging.getLogger('autotvm')
 
diff --git a/python/tvm/contrib/peak.py b/python/tvm/contrib/peak.py
index 1d987a5aeea49..2906410efc405 100644
--- a/python/tvm/contrib/peak.py
+++ b/python/tvm/contrib/peak.py
@@ -208,7 +208,7 @@ def measure_compute_mad(total_item, item_per_thread, base_type, bits, lanes,
     def extern(ins, outs):
         # pylint: disable=unused-argument
         """construct measurement function by building IR directly"""
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
 
         bx = te.thread_axis("blockIdx.x")
         tx = te.thread_axis("threadIdx.x")
diff --git a/python/tvm/contrib/tedd.py b/python/tvm/contrib/tedd.py
index f15b7d489eeed..68e15f2b1ddd8 100644
--- a/python/tvm/contrib/tedd.py
+++ b/python/tvm/contrib/tedd.py
@@ -282,7 +282,7 @@ def get_leaf_itervar_index(itervar, leaf_iv):
     def encode_itervar_relation(obj_manager, rel):
         """Extract and encode IterVar Relationship visualization data to a dictionary"""
         rel_type = type(rel)
-        if rel_type is tvm.schedule.Split:
+        if rel_type is tvm.te.schedule.Split:
             node_type = 'Split_Relation'
             rel_dict = {
                 "type": node_type,
@@ -290,7 +290,7 @@ def encode_itervar_relation(obj_manager, rel):
                 "outer": obj_manager.get_dom_path(rel.outer),
                 "inner": obj_manager.get_dom_path(rel.inner),
             }
-        elif rel_type is tvm.schedule.Fuse:
+        elif rel_type is tvm.te.schedule.Fuse:
             node_type = 'Fuse_Relation'
             rel_dict = {
                 "type": node_type,
@@ -298,7 +298,7 @@ def encode_itervar_relation(obj_manager, rel):
                 "outer": obj_manager.get_dom_path(rel.outer),
                 "inner": obj_manager.get_dom_path(rel.inner),
             }
-        elif rel_type is tvm.schedule.Singleton:
+        elif rel_type is tvm.te.schedule.Singleton:
             node_type = 'Singleton_Relation'
             rel_dict = {
                 "type": node_type,
@@ -377,12 +377,12 @@ def encode_schedule(sch, need_range):
             dict : dictionary
                 A nested dictionary
         """
-        assert isinstance(sch, tvm.schedule.Schedule
-                          ), 'Input is not a tvm.schedule.Schedule object.'
+        assert isinstance(sch, tvm.te.schedule.Schedule
+                          ), 'Input is not a tvm.te.schedule.Schedule object.'
         range_map = None
         if need_range:
             try:
-                range_map = tvm.schedule.InferBound(sch)
+                range_map = tvm.te.schedule.InferBound(sch)
             except tvm._ffi.base.TVMError as expt:
                 warnings.warn(
                     'Ranges are not available, because InferBound fails with the following error:\n'
diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py
index 336db833e24bf..67eb22414abdc 100644
--- a/python/tvm/driver/build_module.py
+++ b/python/tvm/driver/build_module.py
@@ -89,7 +89,7 @@ def form_body(sch):
     """According to the given schedule, form the raw body
     Parameters
     ----------
-    sch : tvm.schedule.Schedule
+    sch : tvm.te.schedule.Schedule
     The given scheduler to form the raw body
 
     Returns
@@ -113,7 +113,7 @@ def lower(sch,
 
     Parameters
     ----------
-    sch : tvm.schedule.Schedule
+    sch : tvm.te.schedule.Schedule
         The schedule to be built
 
     args : list of Buffer or Tensor or Var
@@ -286,7 +286,7 @@ def build(inputs,
 
     Parameters
     ----------
-    inputs : tvm.Schedule, LoweredFunc, or dict of target to LoweredFunc list
+    inputs : tvm.te.Schedule, LoweredFunc, or dict of target to LoweredFunc list
         The schedule to be built
 
     args : list of Buffer or Tensor or Var, optional
@@ -328,7 +328,7 @@ def build(inputs,
         A = te.placeholder((n,), name='A')
         B = te.placeholder((n,), name='B')
         C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
-        s = tvm.create_schedule(C.op)
+        s = tvm.te.create_schedule(C.op)
         f = tvm.lower(s, [A, B, C], name="test_add")
         m = tvm.build(f, target="llvm")
 
@@ -340,7 +340,7 @@ def build(inputs,
         A = te.placeholder((n,), name='A')
         B = te.placeholder((n,), name='B')
         C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
-        s1 = tvm.create_schedule(C.op)
+        s1 = tvm.te.create_schedule(C.op)
         with tvm.target.cuda() as cuda_tgt:
           s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
           f1 = tvm.lower(s1, [A, B, C], name="test_add1")
diff --git a/python/tvm/hybrid/util.py b/python/tvm/hybrid/util.py
index dbdfaec716dc0..6c019893bf20a 100644
--- a/python/tvm/hybrid/util.py
+++ b/python/tvm/hybrid/util.py
@@ -72,7 +72,7 @@ def _pruned_source(func):
 def replace_io(body, rmap):
     """Replacing tensors usage according to the dict given"""
     # pylint: disable=import-outside-toplevel
-    from .. import ir_pass
+    from tvm.tir import ir_pass
 
     def replace(op):
         if isinstance(op, _stmt.Provide) and op.func in rmap.keys():
diff --git a/python/tvm/relay/backend/_backend.py b/python/tvm/relay/backend/_backend.py
index cbc4079b9e619..df0347bd2bae2 100644
--- a/python/tvm/relay/backend/_backend.py
+++ b/python/tvm/relay/backend/_backend.py
@@ -26,7 +26,7 @@ def lower(sch, inputs, func_name, source_func):
 
     Parameters
     ----------
-    sch : tvm.Schedule
+    sch : tvm.te.Schedule
         The schedule.
 
     inputs : List[tvm.te.Tensor]
diff --git a/python/tvm/relay/backend/compile_engine.py b/python/tvm/relay/backend/compile_engine.py
index f3a0c01a661d7..a51e4f7bad115 100644
--- a/python/tvm/relay/backend/compile_engine.py
+++ b/python/tvm/relay/backend/compile_engine.py
@@ -80,11 +80,11 @@ def get_shape(shape):
     """Convert the shape to correct dtype and vars."""
     ret = []
     for dim in shape:
-        if isinstance(dim, tvm.expr.IntImm):
+        if isinstance(dim, tvm.tir.IntImm):
             val = int(dim)
             assert val <= np.iinfo(np.int32).max
-            ret.append(tvm.expr.IntImm("int32", val))
-        elif isinstance(dim, tvm.expr.Any):
+            ret.append(tvm.tir.IntImm("int32", val))
+        elif isinstance(dim, tvm.tir.Any):
             ret.append(te.var("any_dim", "int32"))
         else:
             ret.append(dim)
@@ -130,7 +130,7 @@ def get_valid_implementations(op, attrs, inputs, out_type, target):
             flag = True
             for clause in spec.condition.clauses:
                 clause = analyzer.canonical_simplify(clause)
-                if isinstance(clause, tvm.expr.IntImm) and clause.value:
+                if isinstance(clause, tvm.tir.IntImm) and clause.value:
                     continue
                 flag = False
                 break
diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_runtime_codegen.py
index f58a9b0d5ccd5..762210dbe4288 100644
--- a/python/tvm/relay/backend/graph_runtime_codegen.py
+++ b/python/tvm/relay/backend/graph_runtime_codegen.py
@@ -36,7 +36,7 @@
 from tvm.runtime.ndarray import empty
 from tvm.relay import _build_module
 from tvm import target as _target
-from tvm import expr as _expr
+from tvm.tir import expr as _expr
 
 class GraphRuntimeCodegen(object):
     """The compiler from Relay to the TVM runtime system."""
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index 6d9c850cb7ffa..22e0b916e69ac 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -23,7 +23,7 @@
 
 from tvm.ir import IRModule
 
-from tvm import expr as tvm_expr
+from tvm.tir import expr as tvm_expr
 from .. import nd as _nd, target as _target, autotvm
 from ..contrib import graph_runtime as _graph_rt
 from . import _build_module
diff --git a/python/tvm/relay/op/op.py b/python/tvm/relay/op/op.py
index d2a05af0d978b..6be7d4d4f870f 100644
--- a/python/tvm/relay/op/op.py
+++ b/python/tvm/relay/op/op.py
@@ -183,7 +183,7 @@ def schedule(self, attrs, outs, target):
 
         Returns
         -------
-        schedule : tvm.Schedule
+        schedule : tvm.te.Schedule
             The schedule.
         """
         return _OpImplementationSchedule(self, attrs, outs, target)
diff --git a/python/tvm/target/build_config.py b/python/tvm/target/build_config.py
index 8782d24d2da97..c105175d3e261 100644
--- a/python/tvm/target/build_config.py
+++ b/python/tvm/target/build_config.py
@@ -35,7 +35,7 @@ class DumpIR(object):
     -----------
     .. code-block:: python
 
-        with tvm.build_config(dump_pass_ir=True)
+        with tvm.target.build_config(dump_pass_ir=True)
             run()
     """
     scope_level = 0
diff --git a/python/tvm/te/tag.py b/python/tvm/te/tag.py
index 78c89402d8ee4..1022875ce3dda 100644
--- a/python/tvm/te/tag.py
+++ b/python/tvm/te/tag.py
@@ -80,11 +80,11 @@ def tag_scope(tag):
         B = te.placeholder((m, l), name='B')
         k = te.reduce_axis((0, l), name='k')
 
-        with tvm.tag_scope(tag='matmul'):
+        with tvm.te.tag_scope(tag='matmul'):
             C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k))
 
         # or use tag_scope as decorator
-        @tvm.tag_scope(tag="conv")
+        @tvm.te.tag_scope(tag="conv")
         def compute_relu(data):
             return te.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i)))
     """
diff --git a/python/tvm/tir/generic.py b/python/tvm/tir/generic.py
index 8a9cf8eeb50d4..88be5b1dfd640 100644
--- a/python/tvm/tir/generic.py
+++ b/python/tvm/tir/generic.py
@@ -16,7 +16,7 @@
 # under the License.
 """Generic opertors in TVM.
 We follow the numpy naming convention for this interface
-(e.g., tvm.generic.multitply ~ numpy.multiply).
+(e.g., tvm.tir.generic.multitply ~ numpy.multiply).
 The default implementation is used by tvm.ExprOp.
 """
 # pylint: disable=unused-argument
diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py
index 6e6b1128e6cf7..885b8475082ed 100644
--- a/python/tvm/tir/ir_builder.py
+++ b/python/tvm/tir/ir_builder.py
@@ -98,7 +98,7 @@ class IRBuilder(object):
     --------
     .. code-block:: python
 
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         n = te.var("n")
         A = ib.allocate("float32", n, name="A")
         with ib.for_range(0, n, name="i") as i:
@@ -158,7 +158,7 @@ def scope_attr(self, node, attr_key, value):
         --------
         .. code-block:: python
 
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             i = te.var("i")
             x = ib.pointer("float32")
             ib.scope_attr(x, "storage_scope", "global")
@@ -200,7 +200,7 @@ def for_range(self, begin, end, name="i", dtype="int32", for_type="serial"):
         --------
         .. code-block:: python
 
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             x = ib.pointer("float32")
             with ib.for_range(1, 10, name="i") as i:
                 x[i] = x[i - 1] + 1
@@ -243,7 +243,7 @@ def if_scope(self, cond):
         --------
         .. code-block:: python
 
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             i = te.var("i")
             x = ib.pointer("float32")
             with ib.if_scope((i % 2) == 0):
@@ -268,7 +268,7 @@ def else_scope(self):
         --------
         .. code-block:: python
 
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             i = te.var("i")
             x = ib.pointer("float32")
             with ib.if_scope((i % 2) == 0):
diff --git a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py
index a04e2b80b6618..bf7e60a1df6e2 100755
--- a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py
+++ b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py
@@ -29,7 +29,7 @@ def main():
     A = te.placeholder((n,), name='A')
     B = te.placeholder((n,), name='B')
     C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
-    s = tvm.create_schedule(C.op)
+    s = tvm.te.create_schedule(C.op)
     s[C].parallel(s[C].op.axis[0])
     print(tvm.lower(s, [A, B, C], simple_mode=True))
     tvm.build(s, [A, B, C], 'llvm --system-lib').save(osp.join(sys.argv[1], 'test.o'))
diff --git a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py
index c3e397d12ace8..cb7353ff70abf 100755
--- a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py
+++ b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py
@@ -30,7 +30,7 @@ def main():
     A = te.placeholder((n,), name='A')
     B = te.placeholder((n,), name='B')
     C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
-    s = tvm.create_schedule(C.op)
+    s = tvm.te.create_schedule(C.op)
     s[C].parallel(s[C].op.axis[0])
     print(tvm.lower(s, [A, B, C], simple_mode=True))
     obj_file = osp.join(sys.argv[1], 'test.o')
diff --git a/tests/python/integration/test_dot.py b/tests/python/integration/test_dot.py
index e7dc7e95eb2bb..c66e596ef50cd 100644
--- a/tests/python/integration/test_dot.py
+++ b/tests/python/integration/test_dot.py
@@ -30,11 +30,11 @@ def lower(s, args, name="mydot"):
     s = s.normalize()
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 16)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    fapi = tvm.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 16)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     return fapi
 
 
diff --git a/tests/python/relay/test_pass_fold_constant.py b/tests/python/relay/test_pass_fold_constant.py
index 5e1cd8d8b69df..cc362a266aa50 100644
--- a/tests/python/relay/test_pass_fold_constant.py
+++ b/tests/python/relay/test_pass_fold_constant.py
@@ -55,7 +55,7 @@ def fail(x):
         raise RuntimeError()
 
     # the fold constant should work on any context.
-    with tvm.build_config(add_lower_pass=[(0, fail)]):
+    with tvm.target.build_config(add_lower_pass=[(0, fail)]):
         with tvm.target.create("cuda"):
             zz = run_opt_pass(before(), transform.FoldConstant())
     zexpected = run_opt_pass(expected(), transform.InferType())
diff --git a/tests/python/unittest/test_arith_canonical_simplify.py b/tests/python/unittest/test_arith_canonical_simplify.py
index 3d17bf1ad626b..b4649a4ba75ee 100644
--- a/tests/python/unittest/test_arith_canonical_simplify.py
+++ b/tests/python/unittest/test_arith_canonical_simplify.py
@@ -23,7 +23,7 @@ def __init__(self):
 
     def verify(self, data, expected):
         res = self.analyzer.canonical_simplify(data)
-        assert tvm.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected)
+        assert tvm.tir.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected)
 
 
 def test_mul_sum_simplify():
@@ -197,7 +197,7 @@ def test_reduce_combiner_simplify():
 
         # Check that the remaining components are the expected ones.
         for lhs, rhs in zip(simplified.source, reference_simplified_sources[j]):
-            assert tvm.ir_pass.Equal(lhs, rhs)
+            assert tvm.tir.ir_pass.Equal(lhs, rhs)
 
     # Test that components with side effects are not removed
     side_effect = lambda *xs: tvm.tir.Call("int32", "dummy", xs, tvm.tir.Call.Intrinsic, None, 0)
diff --git a/tests/python/unittest/test_arith_deduce_bound.py b/tests/python/unittest/test_arith_deduce_bound.py
index 3d5a3298f19eb..5baabd16c615b 100644
--- a/tests/python/unittest/test_arith_deduce_bound.py
+++ b/tests/python/unittest/test_arith_deduce_bound.py
@@ -19,7 +19,7 @@
 
 
 def assert_expr_equal(a, b):
-    res =  tvm.ir_pass.Simplify(a - b)
+    res =  tvm.tir.ir_pass.Simplify(a - b)
     equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
     if not equal:
         raise ValueError("{} and {} are not equal".format(a, b))
@@ -83,10 +83,10 @@ def test_deduce():
     e3 = (-b)+a*c-d
     res3 = tvm.arith.deduce_bound(a, e3>=0, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s})
     ans3 = fdiv(2,c)+1
-    assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3)
+    assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3)
 
     res3 = tvm.arith.deduce_bound(a, zero <= e3, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s})
-    assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3)
+    assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3)
 
     # tests for `EQ` op
     res4 = tvm.arith.deduce_bound(a, a == b, {}, {})
@@ -158,21 +158,21 @@ def test_basic(a1, a2, coff):
 
         res1 = tvm.arith.deduce_bound(a, e0<17, {b: b_s}, {b: b_s})
         [x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1
+        assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1
 
         # expression containing variable a is on rhs
         res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32") < e0, {b: b_s}, {b: b_s})
         [x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1
+        assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1
 
         # expression containing variable a is on rhs
         res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32")>= e0, {b: b_s}, {b: b_s})
         [x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1
+        assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1
 
         res1 = tvm.arith.deduce_bound(a, e0>=17, {b: b_s}, {b: b_s})
         [x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1
+        assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1
 
     test_basic(0, 4, 4)
     test_basic(1, 5, 4)
@@ -190,21 +190,21 @@ def test_complex(a1, a2, coff):
 
         res1 = tvm.arith.deduce_bound(a, e0<63, {b: b_s}, {b: b_s})
         [t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1
+        assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1
 
         # expression containing variable a is on rhs
         res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32")>= e0, {b: b_s}, {b: b_s})
         [t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1
+        assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1
 
         res1 = tvm.arith.deduce_bound(a, e0>63, {b: b_s}, {b: b_s})
         [t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1
+        assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1
 
         # expression containing variable a is on rhs
         res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32") <= e0, {b: b_s}, {b: b_s})
         [t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
-        assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1
+        assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1
 
     test_complex(0, 4, 4)
     test_complex(0, 4, -4)
diff --git a/tests/python/unittest/test_arith_detect_clip_bound.py b/tests/python/unittest/test_arith_detect_clip_bound.py
index 217169f194e57..d6953713f14b4 100644
--- a/tests/python/unittest/test_arith_detect_clip_bound.py
+++ b/tests/python/unittest/test_arith_detect_clip_bound.py
@@ -23,15 +23,15 @@ def test_basic():
     c = te.var("c")
     m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6,
                                           a - 1 > 0), [a])
-    assert tvm.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0
     assert m[0].value == 2
     m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6,
                                           a - 1 > 0), [a, b])
     assert len(m) == 0
     m = tvm.arith.detect_clip_bound(tvm.tir.all(a + 10 * c <= 20,
                                           b - 1 > 0), [a, b])
-    assert tvm.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0
-    assert tvm.ir_pass.Simplify(m[2] - 2).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[2] - 2).value == 0
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_arith_detect_linear_equation.py b/tests/python/unittest/test_arith_detect_linear_equation.py
index 6a80bf08899cd..c6e6b753a6926 100644
--- a/tests/python/unittest/test_arith_detect_linear_equation.py
+++ b/tests/python/unittest/test_arith_detect_linear_equation.py
@@ -22,14 +22,14 @@ def test_basic():
     b = te.var("b")
     m = tvm.arith.detect_linear_equation(a * 4 + b * 6 + 7, [a])
     assert m[0].value == 4
-    assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0
 
     m = tvm.arith.detect_linear_equation(a * 4 * (a+1) + b * 6 + 7, [a])
     assert len(m) == 0
 
     m = tvm.arith.detect_linear_equation(a * 4  + (a+1) + b * 6 + 7, [a])
     assert m[0].value == 5
-    assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0
 
     m = tvm.arith.detect_linear_equation(a * b + 7, [a])
     assert m[0] == b
@@ -39,13 +39,13 @@ def test_basic():
 
     m = tvm.arith.detect_linear_equation(b * 7, [])
     assert len(m) == 1
-    assert tvm.ir_pass.Simplify(m[0] - b * 7).value == 0
+    assert tvm.tir.ir_pass.Simplify(m[0] - b * 7).value == 0
 
 def test_multivariate():
     v = [te.var("v%d" % i) for i in range(4)]
     b = te.var("b")
     m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8, v)
-    assert(tvm.ir_pass.Equal(tvm.ir_pass.Simplify(m[0]), b + 5))
+    assert(tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(m[0]), b + 5))
     assert(m[1].value == 8)
 
     m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8 * v[2], v)
@@ -61,11 +61,11 @@ def test_multivariate():
 
     m = tvm.arith.detect_linear_equation((v[0] - v[1]), [v[2]])
     assert(m[0].value == 0)
-    assert(tvm.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0)
+    assert(tvm.tir.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0)
 
     m = tvm.arith.detect_linear_equation((v[0] - v[1]), [])
     assert(len(m) == 1)
-    assert(tvm.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0)
+    assert(tvm.tir.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0)
 
 if __name__ == "__main__":
     test_basic()
diff --git a/tests/python/unittest/test_arith_intset.py b/tests/python/unittest/test_arith_intset.py
index f248ef082651a..8352d9cf22ddf 100644
--- a/tests/python/unittest/test_arith_intset.py
+++ b/tests/python/unittest/test_arith_intset.py
@@ -28,7 +28,7 @@ def err_msg():
             return "\ndata={}\ndmap={}\nres={}\nexpected={}".format(data, dmap, res, expected)
         def equal(x, y):
             res = self.analyzer.canonical_simplify(x - y)
-            return tvm.ir_pass.Equal(res, 0)
+            return tvm.tir.ir_pass.Equal(res, 0)
         assert equal(res.min_value, expected[0]), err_msg()
         assert equal(res.max_value, expected[1]), err_msg()
 
diff --git a/tests/python/unittest/test_arith_rewrite_simplify.py b/tests/python/unittest/test_arith_rewrite_simplify.py
index 9f58ef4506fa8..c8c3b0bd9a3b6 100644
--- a/tests/python/unittest/test_arith_rewrite_simplify.py
+++ b/tests/python/unittest/test_arith_rewrite_simplify.py
@@ -23,7 +23,7 @@ def __init__(self):
 
     def verify(self, data, expected):
         res = self.analyzer.rewrite_simplify(data)
-        assert tvm.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected)
+        assert tvm.tir.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected)
 
 
 def test_vector_simplify():
diff --git a/tests/python/unittest/test_arith_stmt_simplify.py b/tests/python/unittest/test_arith_stmt_simplify.py
index 12a60db855f73..45f083342410b 100644
--- a/tests/python/unittest/test_arith_stmt_simplify.py
+++ b/tests/python/unittest/test_arith_stmt_simplify.py
@@ -18,7 +18,7 @@
 from tvm import te
 
 def test_stmt_simplify():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     C = ib.pointer("float32", name="C")
     n = te.size_var("n")
@@ -27,12 +27,12 @@ def test_stmt_simplify():
             A[i] = C[i]
 
     body = tvm.tir.LetStmt(n, 10, ib.get())
-    body = tvm.ir_pass.CanonicalSimplify(body)
+    body = tvm.tir.ir_pass.CanonicalSimplify(body)
     assert isinstance(body.body, tvm.tir.Store)
 
 
 def test_thread_extent_simplify():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     C = ib.pointer("float32", name="C")
     n = te.size_var("n")
@@ -44,7 +44,7 @@ def test_thread_extent_simplify():
     with ib.if_scope(tx + ty < 12):
         A[tx] = C[tx + ty]
     body = tvm.tir.LetStmt(n, 10, ib.get())
-    body = tvm.ir_pass.CanonicalSimplify(body)
+    body = tvm.tir.ir_pass.CanonicalSimplify(body)
     assert isinstance(body.body.body.body, tvm.tir.Store)
 
 
diff --git a/tests/python/unittest/test_codegen_c_host.py b/tests/python/unittest/test_codegen_c_host.py
index daf5b0eec5e2a..1604ffb2293b6 100644
--- a/tests/python/unittest/test_codegen_c_host.py
+++ b/tests/python/unittest/test_codegen_c_host.py
@@ -74,8 +74,8 @@ def check_c():
         binds = {A : Ab}
         # BUILD and invoke the kernel.
         f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
-        fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
-        fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
+        fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(f1)]
+        fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0])
         mhost = tvm.target.codegen.build_module(fsplits[0], "c")
         temp = util.tempdir()
         path_dso = temp.relpath("temp.so")
@@ -92,7 +92,7 @@ def check_c():
         tvm.testing.assert_allclose(
             c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-    with tvm.build_config(offset_factor=4):
+    with tvm.target.build_config(offset_factor=4):
         check_c()
 
 
diff --git a/tests/python/unittest/test_codegen_cuda.py b/tests/python/unittest/test_codegen_cuda.py
index 27968c69380cd..f94d8c38e3a63 100644
--- a/tests/python/unittest/test_codegen_cuda.py
+++ b/tests/python/unittest/test_codegen_cuda.py
@@ -198,9 +198,9 @@ def vectorizer(op):
                 new_b = tvm.tir.Shuffle(bs, ids)
                 return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
             return None
-        return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
+        return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
 
-    with tvm.build_config(add_lower_pass=[(1, my_vectorize)]):
+    with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]):
         module = tvm.build(sch, [a, b, c], target='cuda')
         a_ = np.array(list(range(64)), dtype='int32')
         b_ = np.array((list(range(4))[::-1]) * 16, dtype='int32')
diff --git a/tests/python/unittest/test_codegen_device.py b/tests/python/unittest/test_codegen_device.py
index d6a44fbc9b12f..88abca8d28207 100644
--- a/tests/python/unittest/test_codegen_device.py
+++ b/tests/python/unittest/test_codegen_device.py
@@ -70,14 +70,14 @@ def test_add_pipeline():
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
     Db = tvm.tir.decl_buffer(D.shape, D.dtype, name='D')
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    fapi = tvm.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True)
-    fsplits = [x for x in tvm.ir_pass.SplitHostDevice(fapi)]
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True)
+    fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(fapi)]
     # lower the floordiv(use stackvm rules so it works for all targets)
-    fsplits = [tvm.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits]
-    fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
+    fsplits = [tvm.tir.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits]
+    fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0])
 
     def check_target(device, host="stackvm"):
         ctx = tvm.context(device, 0)
diff --git a/tests/python/unittest/test_codegen_extern.py b/tests/python/unittest/test_codegen_extern.py
index 3b6b5edb88518..4104af8644397 100644
--- a/tests/python/unittest/test_codegen_extern.py
+++ b/tests/python/unittest/test_codegen_extern.py
@@ -26,14 +26,14 @@ def test_add_pipeline():
 
     def extern_generator(ins, outs):
         """Manually write the IR for the extern function, add pipeline"""
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         with ib.for_range(0, (n+1) // 2) as i:
             ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.tir.const(1, "float32x2")))
         return ib.get()
 
     def extern_generator_gpu(ins, outs):
         """Manually write the IR for the extern function, add pipeline"""
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         bx = te.thread_axis("blockIdx.x")
         tx = te.thread_axis("threadIdx.x")
         ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads)
diff --git a/tests/python/unittest/test_codegen_llvm.py b/tests/python/unittest/test_codegen_llvm.py
index 0eae4b9fc3e46..45554c5475a3a 100644
--- a/tests/python/unittest/test_codegen_llvm.py
+++ b/tests/python/unittest/test_codegen_llvm.py
@@ -23,7 +23,7 @@
 import math
 
 def test_llvm_intrin():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = tvm.runtime.convert(4)
     A = ib.pointer("float32", name="A")
     args = [
@@ -34,7 +34,7 @@ def test_llvm_intrin():
         tvm.tir.Call(
             "int32", "prefetch", args, tvm.tir.Call.Intrinsic, None, 0)))
     body = ib.get()
-    func = tvm.ir_pass.MakeAPI(body, "prefetch", [A], 0, True)
+    func = tvm.tir.ir_pass.MakeAPI(body, "prefetch", [A], 0, True)
     fcode = tvm.build(func, None, "llvm")
 
 
@@ -79,13 +79,13 @@ def check_llvm(use_file):
 
 
 def test_llvm_lookup_intrin():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     m = te.size_var("m")
     A = ib.pointer("uint8x8", name="A")
     x = tvm.tir.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.tir.const(1, 'uint32'), A)
     ib.emit(x)
     body = ib.get()
-    func = tvm.ir_pass.MakeAPI(body, "ctpop", [A], 1, True)
+    func = tvm.tir.ir_pass.MakeAPI(body, "ctpop", [A], 1, True)
     fcode = tvm.build(func, None, "llvm")
 
 
@@ -148,7 +148,7 @@ def check_llvm():
         tvm.testing.assert_allclose(
             c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-    with tvm.build_config(offset_factor=4):
+    with tvm.target.build_config(offset_factor=4):
         check_llvm()
 
 
@@ -263,7 +263,7 @@ def check_llvm(nn, base, stride):
             c.asnumpy(), a.asnumpy()[base:] + 1)
     check_llvm(64, 0, 2)
     check_llvm(4, 0, 1)
-    with tvm.build_config(restricted_func=False):
+    with tvm.target.build_config(restricted_func=False):
         check_llvm(4, 0, 3)
 
 
@@ -391,7 +391,7 @@ def test_rank_zero_bound_checkers():
     def check_llvm(n):
         if not tvm.runtime.enabled("llvm"):
             return
-        with tvm.build_config(instrument_bound_checkers=True):
+        with tvm.target.build_config(instrument_bound_checkers=True):
             A = te.placeholder((n, ), name='A')
             scale = te.placeholder((), name='scale')
             k = te.reduce_axis((0, n), name="k")
@@ -653,9 +653,9 @@ def vectorizer(op):
             value = new_a + new_b
             return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
 
-        return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
+        return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
 
-    with tvm.build_config(add_lower_pass=[(1, my_vectorize)]):
+    with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]):
         ir = tvm.lower(sch, [a, b, c], simple_mode=True)
         module = tvm.build(sch, [a, b, c])
         a_ = tvm.nd.array(np.arange(1, 9, dtype='int32'))
diff --git a/tests/python/unittest/test_codegen_static_init.py b/tests/python/unittest/test_codegen_static_init.py
index 5eb79e5391891..3b5f17a4243a5 100644
--- a/tests/python/unittest/test_codegen_static_init.py
+++ b/tests/python/unittest/test_codegen_static_init.py
@@ -24,7 +24,7 @@ def test_static_callback():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
     i = te.size_var('i')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.buffer_ptr(Ab)
     cp = te.thread_axis((0, 1), "cop")
     finit = tvm.tir.StringImm("TVMBackendRunOnce")
@@ -32,8 +32,8 @@ def test_static_callback():
     with ib.for_range(0, n, "i", for_type="parallel") as i:
         A[i] = A[i] + 1
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     f = tvm.target.codegen.build_module(fapi, "llvm")
     a = tvm.nd.array(np.zeros(10, dtype=dtype))
     f(a)
@@ -45,7 +45,7 @@ def test_static_init():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
     i = te.size_var('i')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     handle = tvm.tir.call_intrin("handle", "tvm_static_handle")
     ib.emit(
         tvm.tir.call_packed("test_static_callback", handle, Ab))
@@ -56,8 +56,8 @@ def test_cb(sh, A):
         return sh
 
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     f = tvm.target.codegen.build_module(fapi, "llvm")
     a = tvm.nd.array(np.zeros(10, dtype=dtype))
     f(a)
diff --git a/tests/python/unittest/test_codegen_vm_basic.py b/tests/python/unittest/test_codegen_vm_basic.py
index 896b95d314815..e2ff4875e6fd7 100644
--- a/tests/python/unittest/test_codegen_vm_basic.py
+++ b/tests/python/unittest/test_codegen_vm_basic.py
@@ -36,9 +36,9 @@ def tvm_call_back_get_shape(shape0):
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), "float32")
     stmt = tvm.tir.Evaluate(tvm.tir.call_packed("tvm_call_back_get_shape", Ab.shape[0]))
-    fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
-    fapi = tvm.ir_pass.LowerIntrin(fapi, "stackvm")
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.LowerIntrin(fapi, "stackvm")
     run_jit(fapi, lambda f: f(a))
 
 
@@ -52,15 +52,15 @@ def test_stack_vm_loop():
     Ab = tvm.tir.decl_buffer((n, ), dtype)
     i = te.size_var('i')
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.buffer_ptr(Ab)
     with ib.for_range(0, n - 1, "i") as i:
         A[i + 1] = A[i] + 1
         ib.emit(tvm.tir.call_packed("tvm_stack_vm_print", i))
 
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     a = tvm.nd.array(np.zeros(10, dtype=dtype))
     def check(f):
         f(a)
@@ -73,7 +73,7 @@ def test_stack_vm_cond():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.buffer_ptr(Ab)
     with ib.for_range(0, n - 1, "i") as i:
         with ib.if_scope(tvm.tir.EQ(i,  4)):
@@ -82,8 +82,8 @@ def test_stack_vm_cond():
             A[i + 1] = A[i] + 2
 
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     def check(f):
         a = tvm.nd.array(np.zeros(10, dtype=dtype))
         f(a)
@@ -97,13 +97,13 @@ def test_vm_parallel():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
     i = te.size_var('i')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.buffer_ptr(Ab)
     with ib.for_range(0, n, "i", for_type="parallel") as i:
         A[i] = A[i] + 1
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     def check(f):
         a = tvm.nd.array(np.zeros(10, dtype=dtype))
         f(a)
diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py
index bc8483f37a834..3e90442d6ee8f 100644
--- a/tests/python/unittest/test_hybrid_script.py
+++ b/tests/python/unittest/test_hybrid_script.py
@@ -24,8 +24,8 @@
 @pytest.mark.skip
 def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None):
     def tvm_val_2_py_val(val):
-        val = tvm.ir_pass.Substitute(val, var_dict)
-        val = tvm.ir_pass.Simplify(val)
+        val = tvm.tir.ir_pass.Substitute(val, var_dict)
+        val = tvm.tir.ir_pass.Simplify(val)
         assert isinstance(val, (tvm.tir.IntImm,))
         return val.value
 
@@ -182,7 +182,7 @@ def fanout(n, a):
     assert isinstance(ir, tvm.tir.For)
     assert ir.loop_var.name == 'i'
     assert ir.min.value == 0
-    assert tvm.ir_pass.Equal(ir.extent, n - 3)
+    assert tvm.tir.ir_pass.Equal(ir.extent, n - 3)
     #Check loopbody
     ibody = ir.body
     assert isinstance(ibody, tvm.tir.AttrStmt)
@@ -215,7 +215,7 @@ def fanout(n, a):
     assert value.a.args[0].value == 0
     assert value.b.name == 'a'
     assert len(value.b.args) == 1
-    assert tvm.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var)
+    assert tvm.tir.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var)
     divide= rbody[2]
     assert isinstance(divide, tvm.tir.Provide)
     assert len(divide.args) == 1
diff --git a/tests/python/unittest/test_ir_builder.py b/tests/python/unittest/test_ir_builder.py
index 689f6fa32839d..9106be843b489 100644
--- a/tests/python/unittest/test_ir_builder.py
+++ b/tests/python/unittest/test_ir_builder.py
@@ -19,7 +19,7 @@
 import numpy as np
 
 def test_for():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
     A = ib.allocate("float32", n, name="A", scope="global")
     with ib.for_range(0, n, name="i") as i:
@@ -39,7 +39,7 @@ def test_for():
     assert isinstance(body[1], tvm.tir.For)
 
 def test_if():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
     A = ib.pointer("float32", name="A")
     tmod = tvm.tir.truncmod
@@ -60,7 +60,7 @@ def test_if():
 
 def test_prefetch():
     A = te.placeholder((10, 20), name="A")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
 
     with ib.for_range(0, n, name="i") as i:
@@ -80,7 +80,7 @@ def test_cpu():
     def test_device_ir(A, B, C):
         n = A.shape[0]
         max_threads = 8
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         Aptr = ib.buffer_ptr(A)
         Bptr = ib.buffer_ptr(B)
         Cptr = ib.buffer_ptr(C)
@@ -115,7 +115,7 @@ def test_gpu():
     def test_device_ir(A, B, C):
         n = A.shape[0]
         max_threads = 32
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         bx = te.thread_axis("blockIdx.x")
         tx = te.thread_axis("threadIdx.x")
         ib.scope_attr(bx, "thread_extent", idxd(n+max_threads-1, max_threads))
diff --git a/tests/python/unittest/test_lang_buffer.py b/tests/python/unittest/test_lang_buffer.py
index 6fff8c828adf7..9203fb1c7b348 100644
--- a/tests/python/unittest/test_lang_buffer.py
+++ b/tests/python/unittest/test_lang_buffer.py
@@ -36,7 +36,7 @@ def test_buffer_access_ptr():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1])
     aptr = Ab.access_ptr("rw")
-    assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m)
+    assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m)
     assert aptr.args[0].dtype == Ab.dtype
     assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
     aptr = Ab.access_ptr("w")
@@ -48,17 +48,17 @@ def test_buffer_access_ptr_offset():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((m, n), "float32")
     aptr = Ab.access_ptr("rw", offset=100)
-    offset = tvm.ir_pass.Simplify(aptr.args[2])
-    assert tvm.ir_pass.Equal(offset, 100)
+    offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+    assert tvm.tir.ir_pass.Equal(offset, 100)
     assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
     v = te.size_var('int32')
     aptr = Ab.access_ptr("rw", offset=100 + 100 + v)
-    offset = tvm.ir_pass.Simplify(aptr.args[2])
-    assert tvm.ir_pass.Equal(offset, 200 + v)
+    offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+    assert tvm.tir.ir_pass.Equal(offset, 200 + v)
     assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
     aptr = Ab.access_ptr("rw", offset=tvm.tir.call_extern('int32', "test_call", 100 + 100 + v))
-    offset = tvm.ir_pass.Simplify(aptr.args[2])
-    assert tvm.ir_pass.Equal(offset, tvm.tir.call_extern('int32', "test_call", 200 + v))
+    offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+    assert tvm.tir.ir_pass.Equal(offset, tvm.tir.call_extern('int32', "test_call", 200 + v))
     assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
 
 
@@ -67,12 +67,12 @@ def test_buffer_access_ptr_extent():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((m, n), "float32")
     aptr = Ab.access_ptr("rw")
-    assert tvm.ir_pass.Equal(aptr.args[3], m * n)
+    assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n)
     aptr = Ab.access_ptr("rw", offset=100)
-    assert tvm.ir_pass.Equal(aptr.args[3], m * n - 100)
+    assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n - 100)
     Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1])
     aptr = Ab.access_ptr("rw", offset=100)
-    assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100)
+    assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100)
 
 
 def test_buffer_vload():
@@ -80,8 +80,8 @@ def test_buffer_vload():
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((m, n), "float32", elem_offset=100)
     load = Ab.vload([2, 3])
-    offset = tvm.ir_pass.Simplify(load.index)
-    assert tvm.ir_pass.Equal(offset, n * 2 + 103)
+    offset = tvm.tir.ir_pass.Simplify(load.index)
+    assert tvm.tir.ir_pass.Equal(offset, n * 2 + 103)
 
 
 def test_buffer_index_merge_mult_mod():
@@ -93,7 +93,7 @@ def test_buffer_index_merge_mult_mod():
     A = tvm.tir.decl_buffer((m, n), "float32")
     A_stride = tvm.tir.decl_buffer((m, n), "float32", strides=(s, 1))
     def assert_simplified_equal(index_simplified, index_direct):
-        assert tvm.ir_pass.Equal(index_simplified, index_direct),\
+        assert tvm.tir.ir_pass.Equal(index_simplified, index_direct),\
         "index_simplified=%s, index_direct=%s" %(index_simplified, index_direct)
     idxd = tvm.tir.indexdiv
     idxm = tvm.tir.indexmod
diff --git a/tests/python/unittest/test_lang_operator.py b/tests/python/unittest/test_lang_operator.py
index 7e2ec78e8b4c7..23c594022faf7 100644
--- a/tests/python/unittest/test_lang_operator.py
+++ b/tests/python/unittest/test_lang_operator.py
@@ -71,7 +71,7 @@ def test_const_fold3():
     for tvm_func, py_func in [(tvm.tir.all, lambda a, b: a and b), (tvm.tir.any, lambda a, b: a or b)]:
         for v1 in [0, 1]:
             for v2 in [0, 1]:
-                assert tvm.ir_pass.Equal(tvm_func(tvm.tir.const(v1, 'uint1'), tvm.tir.const(v2, 'uint1')),
+                assert tvm.tir.ir_pass.Equal(tvm_func(tvm.tir.const(v1, 'uint1'), tvm.tir.const(v2, 'uint1')),
                                          tvm.tir.const(py_func(v1, v2), 'uint1'))
 
     x = te.var("x", 'uint1')
@@ -170,13 +170,13 @@ def test_if_then_else():
             out = tvm.tir.if_then_else(cond, lhs, rhs)
             out2 = tvm.tir.if_then_else(not cond, rhs, lhs)
             out3 = tvm.tir.if_then_else(not cond, lhs, rhs)
-            assert tvm.ir_pass.Equal(out, out2) == 1
+            assert tvm.tir.ir_pass.Equal(out, out2) == 1
             if cond:
-                assert tvm.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1
-                assert tvm.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1
+                assert tvm.tir.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1
+                assert tvm.tir.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1
             else:
-                assert tvm.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1
-                assert tvm.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1
+                assert tvm.tir.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1
+                assert tvm.tir.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1
         elif cond.dtype == 'bool':
             out = tvm.tir.if_then_else(cond, lhs, rhs)
             assert out.dtype == out_dtype
diff --git a/tests/python/unittest/test_lang_schedule.py b/tests/python/unittest/test_lang_schedule.py
index 4888d5bb918f4..dae43bb2bbf2d 100644
--- a/tests/python/unittest/test_lang_schedule.py
+++ b/tests/python/unittest/test_lang_schedule.py
@@ -199,7 +199,7 @@ def intrin_func(ins, outs):
         assert(isinstance(ins[0], tvm.te.schedule.Buffer))
         assert(ins[0].shape[0].value == n)
         return tvm.tir.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
-    intrin = tvm.decl_tensor_intrin(z.op, intrin_func)
+    intrin = te.decl_tensor_intrin(z.op, intrin_func)
     assert intrin.op == z.op
     assert intrin.reduce_init is None
     assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
@@ -228,8 +228,8 @@ def intrin_func(ins, outs, sp):
         assert(sp[1] == w)
         return tvm.tir.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1])
 
-    with tvm.build_config(offset_factor=1):
-      intrin = tvm.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w])
+    with tvm.target.build_config(offset_factor=1):
+      intrin = te.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w])
     assert intrin.op == z.op
     assert intrin.reduce_init is None
     assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
diff --git a/tests/python/unittest/test_lang_tag.py b/tests/python/unittest/test_lang_tag.py
index c2bdd4b91606a..6cfc0b12464e5 100644
--- a/tests/python/unittest/test_lang_tag.py
+++ b/tests/python/unittest/test_lang_tag.py
@@ -19,7 +19,7 @@
 from tvm import te
 from tvm import te
 
-@tvm.tag_scope(tag="conv")
+@tvm.te.tag_scope(tag="conv")
 def compute_conv(data, weight):
     N, IC, H, W = data.shape
     OC, IC, KH, KW = weight.shape
@@ -41,7 +41,7 @@ def test_with():
 
     A = te.placeholder((n, l), name='A')
     B = te.placeholder((m, l), name='B')
-    with tvm.tag_scope(tag="gemm"):
+    with tvm.te.tag_scope(tag="gemm"):
         k = te.reduce_axis((0, l), name='k')
         C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k),
                         attrs={"hello" : 1, "arr": [10, 12]})
diff --git a/tests/python/unittest/test_lang_tensor.py b/tests/python/unittest/test_lang_tensor.py
index 92a42fcfdee87..762b3fe751802 100644
--- a/tests/python/unittest/test_lang_tensor.py
+++ b/tests/python/unittest/test_lang_tensor.py
@@ -113,12 +113,12 @@ def intrin_vadd(n):
         z = te.compute(x.shape, lambda i: x[i] + y[i])
 
         def intrin_func(ins, outs):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
             return ib.get()
 
-        with tvm.build_config(offset_factor=n):
-            return tvm.decl_tensor_intrin(z.op, intrin_func)
+        with tvm.target.build_config(offset_factor=n):
+            return te.decl_tensor_intrin(z.op, intrin_func)
 
     vadd = intrin_vadd(factor)
 
@@ -159,8 +159,8 @@ def intrin_func(ins, outs):
                 "gemv_add", x_ptr, y_ptr, z_ptr, m, n, l)
             return body, reset, update
 
-        with tvm.build_config(offset_factor=n):
-            return tvm.decl_tensor_intrin(z.op, intrin_func)
+        with tvm.target.build_config(offset_factor=n):
+            return te.decl_tensor_intrin(z.op, intrin_func)
 
     vgemm = intrin_gemm(factor1, factor2, factor)
 
@@ -264,7 +264,7 @@ def get_B1_realize(x):
            x.func == B1.op and x.value_index == 1:
             ret.append(x)
     ret = []
-    tvm.ir_pass.PostOrderVisit(stmt, get_B1_realize)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, get_B1_realize)
 
     assert stmt.node == C.op and len(ret) == 1
 
@@ -290,8 +290,8 @@ def intrin_func(ins, outs):
             dout = outs[0]
             return tvm.tir.call_packed("op", dinp, dout)
 
-        with tvm.build_config(offset_factor=1):
-            return tvm.decl_tensor_intrin(P.op, intrin_func)
+        with tvm.target.build_config(offset_factor=1):
+            return te.decl_tensor_intrin(P.op, intrin_func)
 
     A = te.placeholder((1, 64, 16, 16), name='A')
     P = pool(data=A, kernel=(3, 3), stride=(1, 1), padding=(0, 0, 0, 0),
diff --git a/tests/python/unittest/test_pass_attrs_hash_equal.py b/tests/python/unittest/test_pass_attrs_hash_equal.py
index 19cd72cc50a73..b3587cd7cb3db 100644
--- a/tests/python/unittest/test_pass_attrs_hash_equal.py
+++ b/tests/python/unittest/test_pass_attrs_hash_equal.py
@@ -21,28 +21,28 @@ def test_attrs_equal():
     x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
     y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
     z = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3,4,1))
-    assert tvm.ir_pass.AttrsEqual(x, y)
-    assert not tvm.ir_pass.AttrsEqual(x, z)
+    assert tvm.tir.ir_pass.AttrsEqual(x, y)
+    assert not tvm.tir.ir_pass.AttrsEqual(x, z)
 
     dattr = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0))
-    assert not tvm.ir_pass.AttrsEqual(dattr, x)
+    assert not tvm.tir.ir_pass.AttrsEqual(dattr, x)
     dattr2 = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0))
-    assert tvm.ir_pass.AttrsEqual(dattr, dattr2)
+    assert tvm.tir.ir_pass.AttrsEqual(dattr, dattr2)
 
-    assert tvm.ir_pass.AttrsEqual({"x": x}, {"x": y})
+    assert tvm.tir.ir_pass.AttrsEqual({"x": x}, {"x": y})
     # array related checks
-    assert tvm.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]})
-    assert not tvm.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]})
+    assert tvm.tir.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]})
+    assert not tvm.tir.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]})
 
     n = te.var("n")
-    assert tvm.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1})
+    assert tvm.tir.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1})
 
 
 
 
 
 def test_attrs_hash():
-    fhash = tvm.ir_pass.AttrsHash
+    fhash = tvm.tir.ir_pass.AttrsHash
     x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
     y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
     assert fhash({"x": x}) == fhash({"x": y})
diff --git a/tests/python/unittest/test_pass_basic.py b/tests/python/unittest/test_pass_basic.py
index 57d37f73825a2..f7eaa217683bd 100644
--- a/tests/python/unittest/test_pass_basic.py
+++ b/tests/python/unittest/test_pass_basic.py
@@ -21,19 +21,19 @@ def test_simplify():
   tdiv = tvm.tir.truncdiv
   tmod = tvm.tir.truncmod
   x = te.var('x')
-  e1 = tvm.ir_pass.Simplify(x + 2 + 1)
-  assert(tvm.ir_pass.Equal(e1, x + 3))
-  e2 = tvm.ir_pass.Simplify(x * 3 + 5 * x)
-  assert(tvm.ir_pass.Equal(e2, x * 8))
-  e3 = tvm.ir_pass.Simplify(x - tdiv(x, 3) * 3)
-  assert(tvm.ir_pass.Equal(e3, tmod(x, 3)))
+  e1 = tvm.tir.ir_pass.Simplify(x + 2 + 1)
+  assert(tvm.tir.ir_pass.Equal(e1, x + 3))
+  e2 = tvm.tir.ir_pass.Simplify(x * 3 + 5 * x)
+  assert(tvm.tir.ir_pass.Equal(e2, x * 8))
+  e3 = tvm.tir.ir_pass.Simplify(x - tdiv(x, 3) * 3)
+  assert(tvm.tir.ir_pass.Equal(e3, tmod(x, 3)))
 
 
 def test_verify_ssa():
     x = te.var('x')
     y = te.var()
     z = tvm.tir.Evaluate(x + y)
-    assert(tvm.ir_pass.VerifySSA(z))
+    assert(tvm.tir.ir_pass.VerifySSA(z))
 
 
 def test_convert_ssa():
@@ -42,15 +42,15 @@ def test_convert_ssa():
     let1 = tvm.tir.Let(x, 1, x + 1)
     let2 = tvm.tir.Let(x, 1, x + y)
     z = tvm.tir.Evaluate(let1 + let2)
-    assert(not tvm.ir_pass.VerifySSA(z))
-    z_ssa = tvm.ir_pass.ConvertSSA(z)
-    assert(tvm.ir_pass.VerifySSA(z_ssa))
+    assert(not tvm.tir.ir_pass.VerifySSA(z))
+    z_ssa = tvm.tir.ir_pass.ConvertSSA(z)
+    assert(tvm.tir.ir_pass.VerifySSA(z_ssa))
 
 
 def test_expr_use_var():
     x = te.var('x')
-    assert(tvm.ir_pass.ExprUseVar(x+1, x))
-    assert(not tvm.ir_pass.ExprUseVar(1+10, x))
+    assert(tvm.tir.ir_pass.ExprUseVar(x+1, x))
+    assert(not tvm.tir.ir_pass.ExprUseVar(1+10, x))
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_bound_checkers.py b/tests/python/unittest/test_pass_bound_checkers.py
index 7abccc137c432..b3390972ab008 100644
--- a/tests/python/unittest/test_pass_bound_checkers.py
+++ b/tests/python/unittest/test_pass_bound_checkers.py
@@ -20,7 +20,7 @@
 import numpy as np
 def collect_visit(stmt, f):
     ret = []
-    tvm.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x)))
+    tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x)))
     return ret
 
 def lower(sch, args):
@@ -37,12 +37,12 @@ def lower(sch, args):
     sch = sch.normalize()
     bounds = tvm.te.schedule.InferBound(sch)
     stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.RemoveNoOp(stmt)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64, True)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.RemoveNoOp(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64, True)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     return stmt
 
 @pytest.mark.xfail
@@ -201,7 +201,7 @@ def collect_branch_stmt (x):
     # before instrumentation
     assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3)
     assert_bound_instrumentation(stmt, check_branch_stmt, 0)
-    stmt = tvm.ir_pass.InstrumentBoundCheckers(stmt)
+    stmt = tvm.tir.ir_pass.InstrumentBoundCheckers(stmt)
     # after instrumentation
     assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3)
     assert_bound_instrumentation(stmt, check_branch_stmt, 2)
@@ -213,7 +213,7 @@ def collect_branch_stmt (x):
     print (branch_collector[1].condition)
 
 def test_in_bounds_const_loop_partition_llvm():
-    with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True):
+    with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True):
         n = 21
         A = te.placeholder((n, ), name='A')
         B = te.placeholder((n, ), name='B')
@@ -233,7 +233,7 @@ def test_in_bounds_const_loop_partition_llvm():
 
 @pytest.mark.xfail
 def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b):
-    with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True):
+    with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True):
         n = 21
         A = te.placeholder((n, ), name='A')
         B = te.placeholder((n, ), name='B')
@@ -474,7 +474,7 @@ def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm():
     tvm.testing.assert_allclose(d.asnumpy(), d_np)
 
 if __name__ == "__main__":
-    with tvm.build_config(instrument_bound_checkers=True):
+    with tvm.target.build_config(instrument_bound_checkers=True):
         # zero scale
         test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm()
         # in bound
diff --git a/tests/python/unittest/test_pass_combine_context_call.py b/tests/python/unittest/test_pass_combine_context_call.py
index 189dab70be0aa..e51d4d874ec9b 100644
--- a/tests/python/unittest/test_pass_combine_context_call.py
+++ b/tests/python/unittest/test_pass_combine_context_call.py
@@ -24,7 +24,7 @@ def device_context(dev_id):
         return tvm.tir.Call(
             "handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0)
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     A = ib.allocate("float32", n, name="A", scope="global")
     with ib.for_range(0, n, name="i") as i:
@@ -36,8 +36,8 @@ def device_context(dev_id):
             ib.emit(tvm.tir.call_extern
                     ("int32", "fadd", device_context(0), A))
     body = ib.get()
-    f = tvm.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True)
-    f = tvm.ir_pass.CombineContextCall(f)
+    f = tvm.tir.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True)
+    f = tvm.tir.ir_pass.CombineContextCall(f)
     assert f.body.value.dtype == "handle"
     assert f.body.body.value.dtype == "handle"
 
diff --git a/tests/python/unittest/test_pass_decorate_device_scope.py b/tests/python/unittest/test_pass_decorate_device_scope.py
index 1cda4d9b0c3ff..327cfd9ed5484 100644
--- a/tests/python/unittest/test_pass_decorate_device_scope.py
+++ b/tests/python/unittest/test_pass_decorate_device_scope.py
@@ -32,8 +32,8 @@ def test_decorate_device():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt1 = tvm.ir_pass.Simplify(stmt)
-    stmt2 = tvm.ir_pass.DecorateDeviceScope(stmt1)
+    stmt1 = tvm.tir.ir_pass.Simplify(stmt)
+    stmt2 = tvm.tir.ir_pass.DecorateDeviceScope(stmt1)
     assert isinstance(stmt2, tvm.tir.AttrStmt)
     assert stmt2.attr_key == "device_scope"
     assert stmt1 == stmt2.body
diff --git a/tests/python/unittest/test_pass_equal.py b/tests/python/unittest/test_pass_equal.py
index cc62bb1863fc9..873cb7be447ce 100644
--- a/tests/python/unittest/test_pass_equal.py
+++ b/tests/python/unittest/test_pass_equal.py
@@ -27,9 +27,9 @@ def func1():
     def func2():
         return te.exp(tvm.tir.truncdiv((x + y + 1) * y, 4))
 
-    assert tvm.ir_pass.Equal(func1(), func1())
-    assert tvm.ir_pass.Equal(func2(), func2())
-    assert not tvm.ir_pass.Equal(func2(), func1())
+    assert tvm.tir.ir_pass.Equal(func1(), func1())
+    assert tvm.tir.ir_pass.Equal(func2(), func2())
+    assert not tvm.tir.ir_pass.Equal(func2(), func1())
 
 
 def test_equal_compute():
@@ -48,7 +48,7 @@ def func1():
     Ab = tvm.tir.decl_buffer((n,), name='A')
     n = te.var("n")
     def func2():
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         A = ib.buffer_ptr(Ab)
         with ib.for_range(0, n, name="i") as i:
             A[i] = A[i] + 1
@@ -57,8 +57,8 @@ def func2():
                 A[j] = A[j] + 2
         return ib.get()
 
-    assert tvm.ir_pass.Equal(func1(), func1())
-    assert tvm.ir_pass.Equal(func2(), func2())
+    assert tvm.tir.ir_pass.Equal(func1(), func1())
+    assert tvm.tir.ir_pass.Equal(func2(), func2())
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_hoist_if.py b/tests/python/unittest/test_pass_hoist_if.py
index d93ebbc91a7a1..f6bdbd6130f44 100644
--- a/tests/python/unittest/test_pass_hoist_if.py
+++ b/tests/python/unittest/test_pass_hoist_if.py
@@ -32,7 +32,7 @@ def _visit(op):
         key = op
         if isinstance(op, tvm.tir.IfThenElse):
             global var_list
-            tvm.ir_pass.PostOrderVisit(op.condition, _extract_vars)
+            tvm.tir.ir_pass.PostOrderVisit(op.condition, _extract_vars)
             val = [(op.then_case, op.else_case), ("IfThenElse", tuple(var_list))]
             var_list.clear()
         elif isinstance(op, tvm.tir.For):
@@ -43,7 +43,7 @@ def _visit(op):
             return
         node_dict[key] = val
 
-    tvm.ir_pass.PostOrderVisit(stmt, _visit)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, _visit)
     for key, val in node_dict.items():
         struct[val[1]] = tuple(node_dict[child][1] if child in node_dict
                                else None for child in val[0])
@@ -53,7 +53,7 @@ def _visit(op):
     var_list.clear()
 
 def test_basic():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     l = te.var('l')
     m = te.var('m')
     n = te.var('n')
@@ -67,14 +67,14 @@ def test_basic():
                     ib.emit(tvm.tir.Evaluate(n))
 
     stmt = ib.get()
-    new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+    new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
     expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),),
                        ('IfThenElse', ('i',)): (('For', 'j'), ('For', 'j')),
                        ('For', 'i'): (('IfThenElse', ('i',)),)}
     verify_structure(new_stmt, expected_struct)
 
 def test_no_else():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     l = te.var('l')
     m = te.var('m')
     n = te.var('n')
@@ -86,14 +86,14 @@ def test_no_else():
                     ib.emit(tvm.tir.Evaluate(m))
 
     stmt = ib.get()
-    new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+    new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
     expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),),
                        ('IfThenElse', ('i',)): (('For', 'j'), None),
                        ('For', 'i'): (('IfThenElse', ('i',)),)}
     verify_structure(new_stmt, expected_struct)
 
 def test_attr_stmt():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     dshape = (32, 64)
     data = ib.pointer("float32", name="data")
     l = te.var('l')
@@ -113,7 +113,7 @@ def test_attr_stmt():
                     data[bx * j + tx * j * k] = data[bx * j + tx * j * k]  + 1.0
 
     stmt = ib.get()
-    new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+    new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
     expected_struct = {('For', 'k'): (None,), ('IfThenElse', ('i', 'j')): (('For', 'k'), ('For', 'k')),
                        ('For', 'j'): (('IfThenElse', ('i', 'j')),), ('For', 'i'): (('For', 'j'),),
                        ('AttrStmt', 'thread_extent', 64): (('For', 'i'),),
@@ -121,7 +121,7 @@ def test_attr_stmt():
     verify_structure(new_stmt, expected_struct)
 
 def test_nested_for():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     data = ib.pointer("float32", name="data")
 
 
@@ -137,14 +137,14 @@ def test_nested_for():
                             data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5
 
     stmt = ib.get()
-    new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+    new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
     expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('For', 'l'): (('IfThenElse', ('i', 'j')),),
                        ('For', 'k'): (('For', 'l'),), ('For', 'j'): (None,), ('IfThenElse', ('i',)): (('For', 'j'), None),
                        ('For', 'i'): (('IfThenElse', ('i',)),)}
     verify_structure(new_stmt, expected_struct)
 
 def test_if_block():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     data = ib.pointer("float32", name="data")
     n = te.var("n")
 
@@ -170,7 +170,7 @@ def test_if_block():
                         data[i * 3 + j + k] = data[i * 3 + j + k] + 0.6
 
     stmt = ib.get()
-    new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+    new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
     expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('IfThenElse', ('j',)): (None, None),
                        ('For', 'l'): (None,), ('For', 'k'): (None,), ('For', 'j'): (('For', 'j'),),
                        ('IfThenElse', ('i',)): (('For', 'j'), None), ('For', 'i'): (('IfThenElse', ('i',)),),
diff --git a/tests/python/unittest/test_pass_inject_copy_intrin.py b/tests/python/unittest/test_pass_inject_copy_intrin.py
index 5920ed780b9d2..8c34e344d73ea 100644
--- a/tests/python/unittest/test_pass_inject_copy_intrin.py
+++ b/tests/python/unittest/test_pass_inject_copy_intrin.py
@@ -28,14 +28,14 @@ def test_copy2d():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
     def cb(src, dst, pad_before, pad_after, pad_value):
         assert dst.strides[0] == l
         assert dst.strides[1].value == 1
         assert src.strides[0] == l
         assert tuple(src.shape) == (m, l)
         return tvm.tir.Evaluate(0)
-    stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+    stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
 
 def test_copy_pad():
     m = te.var('m')
@@ -50,16 +50,16 @@ def test_copy_pad():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
     def cb(src, dst, pad_before, pad_after, pad_value):
-        assert tvm.ir_pass.Simplify(src.elem_offset).value == 0
+        assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0
         assert pad_before[0].value == 1
         assert pad_before[1].value == 0
         assert pad_after[0].value == 1
         assert pad_after[1].value == 0
         assert pad_value.value == 1.0
         return tvm.tir.Evaluate(0)
-    stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+    stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
 
 def test_single_point_test():
     A = te.placeholder((1,), name='A')
@@ -71,17 +71,17 @@ def test_single_point_test():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
     def cb(src, dst, pad_before, pad_after, pad_value):
-        assert tvm.ir_pass.Simplify(src.elem_offset).value == 0
-        assert tvm.ir_pass.Simplify(dst.elem_offset).value == 0
-        assert tvm.ir_pass.Simplify(src.strides[0]).value == 1
-        assert tvm.ir_pass.Simplify(dst.strides[0]).value == 1
+        assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0
+        assert tvm.tir.ir_pass.Simplify(dst.elem_offset).value == 0
+        assert tvm.tir.ir_pass.Simplify(src.strides[0]).value == 1
+        assert tvm.tir.ir_pass.Simplify(dst.strides[0]).value == 1
         return tvm.tir.Evaluate(0)
-    stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+    stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
 
 def assert_expr_equal(a, b):
-    assert tvm.ir_pass.Simplify(a - b).value == 0
+    assert tvm.tir.ir_pass.Simplify(a - b).value == 0
 
 def test_copy_pad_split():
     m = 4 * 3
@@ -98,9 +98,9 @@ def test_copy_pad_split():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
     def cb(src, dst, pad_before, pad_after, pad_value):
         assert(dst.elem_offset.value == 0)
         assert_expr_equal(src.elem_offset, tvm.te.max(xo * 4, 1) - 1)
@@ -111,7 +111,7 @@ def cb(src, dst, pad_before, pad_after, pad_value):
         assert_expr_equal(pad_after[0], rpad_after)
         assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after)
         return tvm.tir.Evaluate(0)
-    stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+    stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_inject_double_buffer.py b/tests/python/unittest/test_pass_inject_double_buffer.py
index 314902b3bdf35..0fe3f614796b6 100644
--- a/tests/python/unittest/test_pass_inject_double_buffer.py
+++ b/tests/python/unittest/test_pass_inject_double_buffer.py
@@ -22,7 +22,7 @@ def test_double_buffer():
     n = 100
     m = 4
     tx = te.thread_axis("threadIdx.x")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     C = ib.pointer("float32", name="C")
     ib.scope_attr(tx, "thread_extent", 1)
@@ -36,17 +36,17 @@ def test_double_buffer():
             C[j] = B[j] + 1
 
     stmt = ib.get()
-    stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert isinstance(stmt.body.body, tvm.tir.Allocate)
     assert stmt.body.body.extents[0].value == 2
-    f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
-    f = tvm.ir_pass.ThreadSync(f, "shared")
+    f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
+    f = tvm.tir.ir_pass.ThreadSync(f, "shared")
     count = [0]
     def count_sync(op):
         if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync":
             count[0] += 1
-    tvm.ir_pass.PostOrderVisit(f.body, count_sync)
+    tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync)
     assert count[0] == 4
 
 
diff --git a/tests/python/unittest/test_pass_inject_vthread.py b/tests/python/unittest/test_pass_inject_vthread.py
index 89285912f7729..8fbd8295d2388 100644
--- a/tests/python/unittest/test_pass_inject_vthread.py
+++ b/tests/python/unittest/test_pass_inject_vthread.py
@@ -25,7 +25,7 @@ def test_vthread():
     def get_vthread(name):
         tx = te.thread_axis(name)
         ty = te.thread_axis(name)
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         A = ib.pointer("float32", name="A")
         C = ib.pointer("float32", name="C")
         with ib.for_range(0, n) as i:
@@ -40,9 +40,9 @@ def get_vthread(name):
             C[i * nthread + tx] = B[i] + 1
         return ib.get()
 
-    stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread"))
+    stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread"))
     assert stmt.body.body.extents[0].value == 2
-    stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("cthread"))
+    stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("cthread"))
     assert len(stmt.body.body.extents) == 3
 
 
@@ -54,7 +54,7 @@ def test_vthread_extern():
     def get_vthread(name):
         tx = te.thread_axis(name)
         ty = te.thread_axis(name)
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         with ib.for_range(0, n) as i:
             ib.scope_attr(tx, "virtual_thread", nthread)
             ib.scope_attr(ty, "virtual_thread", nthread)
@@ -72,7 +72,7 @@ def get_vthread(name):
                                     cbuffer.access_ptr("rw")))
         return ib.get()
 
-    stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread"))
+    stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread"))
     assert stmt.body.body.extents[0].value == 2
     assert stmt.body.body.body.body.body.body.extents[0].value == 2
     assert len(stmt.body.body.body.body.body.body.extents) == 3
@@ -80,7 +80,7 @@ def get_vthread(name):
 def test_vthread_if_then_else():
     nthread = 2
     tx = te.thread_axis("vthread")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, 100) as i:
         ib.scope_attr(tx, "virtual_thread", nthread)
@@ -92,7 +92,7 @@ def test_vthread_if_then_else():
         with ib.if_scope(i == 0):
             B[i] = A[i * nthread + tx] + 2
     stmt = ib.get()
-    stmt = tvm.ir_pass.InjectVirtualThread(stmt)
+    stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt)
     assert stmt.body.body.body[0].else_case != None
     assert stmt.body.body.body[1].else_case == None
 
diff --git a/tests/python/unittest/test_pass_inline.py b/tests/python/unittest/test_pass_inline.py
index d332add3d92dc..ad0591d3a7c16 100644
--- a/tests/python/unittest/test_pass_inline.py
+++ b/tests/python/unittest/test_pass_inline.py
@@ -22,15 +22,15 @@ def test_inline():
     A = te.placeholder((m,), name='A')
     T = te.compute((m,), lambda i,: A[i] + 10, name='T')
     stmt = tvm.tir.Evaluate(T[10] + 11 * T[100])
-    stmt = tvm.ir_pass.Inline(
+    stmt = tvm.tir.ir_pass.Inline(
         stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
     print(stmt)
-    assert(tvm.ir_pass.VerifySSA(stmt))
+    assert(tvm.tir.ir_pass.VerifySSA(stmt))
 
     try:
         # pass in int array(wrong argument type)
         # must raise an error
-        stmt = tvm.ir_pass.Inline(
+        stmt = tvm.tir.ir_pass.Inline(
             T.op, [1,2,3], T.op.body, stmt)
         assert False
     except tvm.error.TVMError:
@@ -41,12 +41,12 @@ def test_inline2():
     A = te.placeholder((m,), name='A')
     T = te.compute((m,), lambda i,: A[i] + 10, name='T')
     stmt = tvm.tir.Evaluate(te.exp(T[10]) + 11 * T[100])
-    stmt = tvm.ir_pass.Inline(
+    stmt = tvm.tir.ir_pass.Inline(
         stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
     def check(op):
         if isinstance(op, tvm.tir.Call):
             assert op.func != T.op
-    tvm.ir_pass.PostOrderVisit(stmt, check)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, check)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_ir_transform.py b/tests/python/unittest/test_pass_ir_transform.py
index 564831f52fcf8..cb7417a7a54f2 100644
--- a/tests/python/unittest/test_pass_ir_transform.py
+++ b/tests/python/unittest/test_pass_ir_transform.py
@@ -18,7 +18,7 @@
 from tvm import te
 
 def test_ir_transform():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -37,7 +37,7 @@ def postorder(op):
         if op.name == "TestA":
             return tvm.tir.call_extern("int32", "TestB", op.args[0] + 1)
         return op
-    body = tvm.ir_pass.IRTransform(body, preorder, postorder, ["Call"])
+    body = tvm.tir.ir_pass.IRTransform(body, preorder, postorder, ["Call"])
     stmt_list = tvm.tir.stmt_list(body.body.body)
     assert stmt_list[0].value.args[0].name == "TestB"
     assert stmt_list[1].value.value == 0
diff --git a/tests/python/unittest/test_pass_lift_attr_scope.py b/tests/python/unittest/test_pass_lift_attr_scope.py
index e774dc427e1ad..0831565dc155b 100644
--- a/tests/python/unittest/test_pass_lift_attr_scope.py
+++ b/tests/python/unittest/test_pass_lift_attr_scope.py
@@ -18,7 +18,7 @@
 from tvm import te
 
 def test_coproc_lift():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     cp = te.thread_axis((0, 1), "cop")
     value = tvm.tir.StringImm("xxx")
@@ -35,11 +35,11 @@ def test_coproc_lift():
                 A[j] = A[j] + 3
                 A[j] = A[j] + 3
     body = ib.get()
-    body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
+    body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
     assert body.body.body.node == cp
 
     # only able to lift to the common pattern of the last two fors.
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.allocate("float32", n, name="A", scope="global")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -52,7 +52,7 @@ def test_coproc_lift():
             A[i] = A[i] + 2
 
     body = ib.get()
-    body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
+    body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
     assert body.body.body.body[1].node == cp
     assert len(body.body.body.body) == 2
 
diff --git a/tests/python/unittest/test_pass_loop_partition.py b/tests/python/unittest/test_pass_loop_partition.py
index 739f01fb68e46..7ec35e618aa3b 100644
--- a/tests/python/unittest/test_pass_loop_partition.py
+++ b/tests/python/unittest/test_pass_loop_partition.py
@@ -20,7 +20,7 @@
 
 def collect_visit(stmt, f):
     ret = []
-    tvm.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x)))
+    tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x)))
     return ret
 
 def find_top_produce(stmt):
@@ -28,7 +28,7 @@ def f(x, ret):
         if isinstance(x, tvm.tir.ProducerConsumer):
             ret.append(x)
     ret = []
-    tvm.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret))
+    tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret))
     return ret[-1]
 
 def lower(sch, args):
@@ -45,11 +45,11 @@ def lower(sch, args):
     sch = sch.normalize()
     bounds = tvm.te.schedule.InferBound(sch)
     stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     return stmt
 
 def test_basic():
@@ -63,8 +63,8 @@ def test_basic():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert('if' not in str(stmt.body.body.body[0]))
     assert('if' in str(stmt.body.body.body[1]))
 
@@ -79,12 +79,12 @@ def test_const_loop():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert('if' not in str(stmt.body.body.body[0]))
 
 def test_multi_loop():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     m = te.size_var('m')
     n = te.size_var('n')
     with ib.for_range(0, 4, "i") as i:
@@ -95,12 +95,12 @@ def test_multi_loop():
                 with ib.else_scope():
                     ib.emit(tvm.tir.Evaluate(n))
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt.body[0], lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_multi_if():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     m = te.size_var('m')
     n = te.size_var('n')
     with ib.for_range(0, 4, 'i') as i:
@@ -115,8 +115,8 @@ def test_multi_if():
                 with ib.else_scope():
                     ib.emit(tvm.tir.Evaluate(n))
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert('if' not in str(stmt.body[0]))
 
 def test_thread_axis():
@@ -134,8 +134,8 @@ def test_thread_axis():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert('if' not in str(stmt.body.body.body[0]))
 
 def test_vectorize():
@@ -161,7 +161,7 @@ def test_vectorize():
     assert(any(collect_visit(body.then_case, lambda x: isinstance(x, tvm.tir.Ramp))))
 
 def test_condition():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     m = te.size_var('m')
     n = te.size_var('n')
     with ib.for_range(0, tvm.tir.truncdiv(n+3,4), 'i') as i:
@@ -169,20 +169,20 @@ def test_condition():
         ib.emit(tvm.tir.Evaluate(
           tvm.tir.Select(ib.likely(i*4+j<n), m, n)))
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt[0], lambda x: isinstance(x, tvm.tir.Select))))
 
 def test_condition_EQ():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     m = te.size_var('m')
     n = te.size_var('n')
     with ib.for_range(0, 10, 'i') as i:
             ib.emit(tvm.tir.Evaluate(
                 tvm.tir.Select(ib.likely(tvm.tir.EQ(i, 5)), m, n)))
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt[0], lambda x: isinstance(x, tvm.tir.Select))))
 
 def test_thread_axis2():
@@ -205,15 +205,15 @@ def test_thread_axis2():
 def test_everything_during_deduction():
     m = te.size_var('m')
     n = te.size_var('n')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     with ib.for_range(0, n, 'i') as i:
         with ib.for_range(0, 32, 'j') as j:
             with ib.if_scope(ib.likely(tvm.tir.truncdiv(i,j) < m)):
                 # this guard will produce everything during deduction
                 ib.emit(tvm.tir.Evaluate(m))
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(isinstance(stmt.body.body, tvm.tir.IfThenElse))
 
 def test_single_likely():
@@ -228,8 +228,8 @@ def test_single_likely():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_multi_likely():
@@ -249,13 +249,13 @@ def test_multi_likely():
 
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_oneD_pool():
     m = te.size_var('m')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     #data = te.placeholder((16,), name = 'data')
     data = ib.pointer("float32", name="A")
     out = ib.pointer("float32", name="A")
@@ -276,12 +276,12 @@ def test_oneD_pool():
                     out[ow] = tvm.te.max(out[ow], data[ow + kw - 1])
 
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_cce_loop_1():
-  ib = tvm.ir_builder.create()
+  ib = tvm.tir.ir_builder.create()
   dtype = 'float16'
   n = 514
   m = 514
@@ -297,12 +297,12 @@ def test_cce_loop_1():
           with ib.if_scope(ib.likely(((i*160) + j) < 1600)):
                A[(i+1)*m+j+1] = B[(i)*m+j+1] + B[(i+1)*m+j+1] + B[(i+2)*m+j+1]
   stmt = ib.get()
-  stmt = tvm.ir_pass.LoopPartition(stmt, True)
-  stmt = tvm.ir_pass.Simplify(stmt)
+  stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+  stmt = tvm.tir.ir_pass.Simplify(stmt)
   assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_cce_loop_2():
-  ib = tvm.ir_builder.create()
+  ib = tvm.tir.ir_builder.create()
   len = 112
   tile = 32
   loop = (len + tile - 1) // tile
@@ -316,13 +316,13 @@ def test_cce_loop_2():
       ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail))
 
   stmt = ib.get()
-  stmt = tvm.ir_pass.LoopPartition(stmt, True)
-  stmt = tvm.ir_pass.Simplify(stmt)
+  stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+  stmt = tvm.tir.ir_pass.Simplify(stmt)
   assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 
 def test_cce_loop_3():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     loop1 = 4
     loop2 = 9998
     tile = 39991
@@ -334,8 +334,8 @@ def test_cce_loop_3():
                 ib.emit(tvm.tir.call_extern('float16',"cce_intrisic",head1))
 
     stmt = ib.get()
-    stmt = tvm.ir_pass.LoopPartition(stmt,True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt,True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 def test_conv_tiling():
@@ -363,8 +363,8 @@ def test_conv_tiling():
     oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
     bounds = tvm.te.schedule.InferBound(s)
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, True)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
 
 
@@ -380,7 +380,7 @@ def test_multilevel_splitting_with_indivisble_factors():
     s[B].unroll(yi)
 
     ## But this does the right thing.
-    with tvm.build_config(partition_const_loop=True):
+    with tvm.target.build_config(partition_const_loop=True):
         lowered_body = tvm.lower(s, [A, B]).body
         def visit_stmt(op):
             return(isinstance(op, tvm.tir.Max))
@@ -401,7 +401,7 @@ def test_double_splitting_with_indivisible_factors():
     s[C].compute_at(s[D], do)
 
     target = 'llvm'
-    with tvm.build_config(partition_const_loop=True):
+    with tvm.target.build_config(partition_const_loop=True):
         f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False)
         func = tvm.build(f, target=target)
 
@@ -438,13 +438,13 @@ def test_simple_rfactor():
     bounds = tvm.te.schedule.InferBound(s)
 
     stmt1 = tvm.te.schedule.ScheduleOps(s, bounds)
-    stmt1 = tvm.ir_pass.Simplify(stmt1)
+    stmt1 = tvm.tir.ir_pass.Simplify(stmt1)
 
-    stmt2 = tvm.ir_pass.LoopPartition(stmt1, True)
-    stmt2 = tvm.ir_pass.Simplify(stmt2)
+    stmt2 = tvm.tir.ir_pass.LoopPartition(stmt1, True)
+    stmt2 = tvm.tir.ir_pass.Simplify(stmt2)
 
     #make sure loop partition actually did something
-    assert not tvm.ir_pass.Equal(stmt1.body, stmt2.body)
+    assert not tvm.tir.ir_pass.Equal(stmt1.body, stmt2.body)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_lower_intrin.py b/tests/python/unittest/test_pass_lower_intrin.py
index 40252053013e4..f36b4a5d95fe6 100644
--- a/tests/python/unittest/test_pass_lower_intrin.py
+++ b/tests/python/unittest/test_pass_lower_intrin.py
@@ -22,8 +22,8 @@ def lower_intrin(stmt):
     """wrapper to call transformation in stmt"""
     lower_expr = isinstance(stmt, tvm.tir.PrimExpr)
     stmt = tvm.tir.Evaluate(stmt) if lower_expr else stmt
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt  = tvm.ir_pass._LowerIntrinStmt(stmt, "llvm")
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt  = tvm.tir.ir_pass._LowerIntrinStmt(stmt, "llvm")
     return stmt.value if lower_expr else stmt.body
 
 
diff --git a/tests/python/unittest/test_pass_lower_warp_memory.py b/tests/python/unittest/test_pass_lower_warp_memory.py
index 191d80128a8ef..266ca7eac99f5 100644
--- a/tests/python/unittest/test_pass_lower_warp_memory.py
+++ b/tests/python/unittest/test_pass_lower_warp_memory.py
@@ -34,8 +34,8 @@ def test_lower_warp_mem():
     s[AA].bind(xi, tx)
 
     f = tvm.lower(s, [A, B])
-    fhost, fdevice = tvm.ir_pass.SplitHostDevice(f)
-    fdevice = tvm.ir_pass.LowerWarpMemory(fdevice, 16)
+    fhost, fdevice = tvm.tir.ir_pass.SplitHostDevice(f)
+    fdevice = tvm.tir.ir_pass.LowerWarpMemory(fdevice, 16)
     assert(fdevice.body.body.value.value == "local")
     assert(fdevice.body.body.body.extents[0].value == 2)
 
diff --git a/tests/python/unittest/test_pass_makeapi.py b/tests/python/unittest/test_pass_makeapi.py
index d11acb5ccb9c5..6b28ef6cee189 100644
--- a/tests/python/unittest/test_pass_makeapi.py
+++ b/tests/python/unittest/test_pass_makeapi.py
@@ -32,10 +32,10 @@ def test_makeapi():
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
     Cb = tvm.tir.decl_buffer(C.shape, C.dtype, name='C')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)
 
     num_unpacked_args = 2
-    f = tvm.ir_pass.MakeAPI(
+    f = tvm.tir.ir_pass.MakeAPI(
         stmt, "myadd", [n, Ab, Bb, Cb], num_unpacked_args, True)
     assert(f.handle_data_type[Ab.data].dtype == Ab.dtype)
     assert(len(f.args) == 7)
diff --git a/tests/python/unittest/test_pass_remove_no_op.py b/tests/python/unittest/test_pass_remove_no_op.py
index b2339417dc70e..c9ecfbe218715 100644
--- a/tests/python/unittest/test_pass_remove_no_op.py
+++ b/tests/python/unittest/test_pass_remove_no_op.py
@@ -36,16 +36,16 @@ def test_remove_no_op():
                 k, 0, m, 0, 0,
                 tvm.tir.IfThenElse(
                     (i*m+j+k < n), tvm.tir.Evaluate(m), tvm.tir.Evaluate(n)))))
-    ret = tvm.ir_pass.RemoveNoOp(stmt)
+    ret = tvm.tir.ir_pass.RemoveNoOp(stmt)
     assert(isinstance(ret, tvm.tir.Evaluate))
     store = tvm.tir.Store(Ab.data,
                            tvm.tir.Load(dtype, Ab.data, i) + 1,
                            i + 1)
     stmt2 = tvm.tir.SeqStmt([nop(), tvm.tir.SeqStmt([store, nop()])])
-    assert(tvm.ir_pass.RemoveNoOp(stmt2) == store)
+    assert(tvm.tir.ir_pass.RemoveNoOp(stmt2) == store)
     # remove zero extent loop
     stmt3 = tvm.tir.For(i, 0, 0, 0, 0, store)
-    ret = tvm.ir_pass.RemoveNoOp(stmt3)
+    ret = tvm.tir.ir_pass.RemoveNoOp(stmt3)
     assert(isinstance(ret, tvm.tir.Evaluate))
 
 
diff --git a/tests/python/unittest/test_pass_rewrite_unsafe_select.py b/tests/python/unittest/test_pass_rewrite_unsafe_select.py
index c6203d90d2aff..f1e411eab9d56 100644
--- a/tests/python/unittest/test_pass_rewrite_unsafe_select.py
+++ b/tests/python/unittest/test_pass_rewrite_unsafe_select.py
@@ -19,18 +19,18 @@
 
 
 def test_rewrite_Select():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.allocate("float32", 100, name="A", scope="global")
     i = te.var("i")
     y = tvm.tir.Select(i > 1, A[i-1], 1.0)
-    yy = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value
+    yy = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value
 
     z = tvm.tir.Select(
         tvm.tir.Select(i > 1, A[i-1], 1.0) > 0.0, A[i], 0.1)
-    zz = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value
+    zz = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value
 
     a = tvm.tir.Select(tvm.te.floordiv(i, 4) > 10, y, z)
-    aa = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value
+    aa = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value
     assert yy.name == "tvm_if_then_else"
     assert zz.name == "tvm_if_then_else"
     assert isinstance(aa, tvm.tir.Select)
diff --git a/tests/python/unittest/test_pass_storage_flatten.py b/tests/python/unittest/test_pass_storage_flatten.py
index e246c994239b9..e8a78cbc5209f 100644
--- a/tests/python/unittest/test_pass_storage_flatten.py
+++ b/tests/python/unittest/test_pass_storage_flatten.py
@@ -33,8 +33,8 @@ def test_flatten2():
 
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
 
 def test_flatten_prefetch():
     A = te.placeholder((25, 100, 4), name = 'A')
@@ -43,8 +43,8 @@ def test_flatten_prefetch():
     j = te.size_var('j')
     region = [tvm.ir.Range.make_by_min_extent(i[0], i[1]) for i in [(i, 2), (j, 8), (0, 4)]]
     stmt = tvm.tir.Prefetch(A.op, 0, A.dtype, region)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: _A}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: _A}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert stmt.extent.value == 2
     assert isinstance(stmt.body, tvm.tir.For)
     assert stmt.body.extent.value == 2
@@ -64,8 +64,8 @@ def test_flatten_storage_align():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert(stmt.body.extents[0].value == 17 * 8)
 
 def test_flatten_double_buffer():
@@ -73,7 +73,7 @@ def test_flatten_double_buffer():
     n = 100
     m = 4
     tx = te.thread_axis("threadIdx.x")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     C = ib.pointer("float32", name="C")
     ib.scope_attr(tx, "thread_extent", 1)
@@ -87,18 +87,18 @@ def test_flatten_double_buffer():
             C[j] = B[j] + 1
 
     stmt = ib.get()
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {}, 64)
-    stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2)
-    stmt = tvm.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {}, 64)
+    stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
     assert isinstance(stmt.body.body, tvm.tir.Allocate)
     assert stmt.body.body.extents[0].value == 2
-    f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
-    f = tvm.ir_pass.ThreadSync(f, "shared")
+    f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
+    f = tvm.tir.ir_pass.ThreadSync(f, "shared")
     count = [0]
     def count_sync(op):
         if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync":
             count[0] += 1
-    tvm.ir_pass.PostOrderVisit(f.body, count_sync)
+    tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync)
     assert count[0] == 4
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_storage_rewrite.py b/tests/python/unittest/test_pass_storage_rewrite.py
index 562df4e43d713..c74225d3a4be7 100644
--- a/tests/python/unittest/test_pass_storage_rewrite.py
+++ b/tests/python/unittest/test_pass_storage_rewrite.py
@@ -32,17 +32,17 @@ def test_storage_share():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     # verify only have one allocations.
     # verify inplace folding works
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
     assert num_alloc[0] == 1
 
 def register_mem(scope_tb, max_bits):
@@ -61,7 +61,7 @@ def test_alloc_seq():
 
     register_mem(scope_tb, max_bits)
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -72,18 +72,18 @@ def test_alloc_seq():
             A[j] = 1.3
 
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
             assert n.extents[0].value == 200
-    tvm.ir_pass.PostOrderVisit(body, verify)
+    tvm.tir.ir_pass.PostOrderVisit(body, verify)
     assert num_alloc[0] == 1
 
 def test_alloc_different_dtypes():
     def stmt_generater(dtype_list, length):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         base_dtype = dtype_list[0]
         global_a = te.placeholder((length,), name = "global_a", dtype = base_dtype)
         assert len(dtype_list) == 4
@@ -129,8 +129,8 @@ def verify(n):
 
         body = stmt_generater(dtype_list, length)
         offset = offset_generater(dtype_list, length)
-        body = tvm.ir_pass.StorageRewrite(body)
-        tvm.ir_pass.PostOrderVisit(body, verify)
+        body = tvm.tir.ir_pass.StorageRewrite(body)
+        tvm.tir.ir_pass.PostOrderVisit(body, verify)
 
     length = 1024
     dtype_list = ["float16", "int32", "uint16", "int8"]
@@ -159,17 +159,17 @@ def test_inplace_rule():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     # verify only have one allocations.
     # verify inplace folding works
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
     assert num_alloc[0] == 2
 
 
@@ -191,16 +191,16 @@ def test_storage_combine():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
             assert (n.extents[0].value == 16)
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
     assert num_alloc[0] == 1
 
 
@@ -225,22 +225,22 @@ def test_storage_share_gpu():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='A')
     Bb = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     alloc_stats = {"global": 0, "shared": 0}
 
     def verify(n):
         if isinstance(n, tvm.tir.AttrStmt):
             if n.attr_key == "storage_scope":
                 alloc_stats[n.value.value] += 1
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
     assert alloc_stats["global"] == 2
     assert alloc_stats["shared"] == num_stage
 
 def test_parallel_alloc():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i", for_type="parallel") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -248,10 +248,10 @@ def test_parallel_alloc():
             A[j] = A[j] + 2
 
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
     assert (isinstance(body.body.body, tvm.tir.Allocate))
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="t") as i:
         ib.scope_attr(
@@ -262,7 +262,7 @@ def test_parallel_alloc():
                 A = ib.allocate("float32", n, name="A", scope="global")
                 A[j] = A[j] + 2
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
 
     assert(isinstance(body.body.body.body.body, tvm.tir.Allocate))
 
@@ -288,17 +288,17 @@ def test_inplace_rule2(scope_tb = "local_TB2", max_bits = 1024 * 1024 * 1024):
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
     Cc = tvm.tir.decl_buffer(C.shape, B.dtype, name='C')
     Dd = tvm.tir.decl_buffer(D.shape, B.dtype, name='D')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     # verify only have one allocations.
     # verify inplace folding works
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
     assert num_alloc[0] == 2
 
 def test_exceed_mem():
@@ -381,19 +381,19 @@ def test_inplace_rule3():
     B5a = tvm.tir.decl_buffer(B5.shape, B5.dtype, name='B5')
 
     Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64)
-    stmt = tvm.ir_pass.CanonicalSimplify(stmt)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.StorageRewrite(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64)
+    stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
     # verify only have one allocations.
     # verify inplace folding works
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             assert n.extents[0].value == 70
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
 
 def test_alloc_seq_type():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -411,13 +411,13 @@ def test_alloc_seq_type():
             A2[j] = A[j]
 
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
             assert n.extents[0].value == 500
-    tvm.ir_pass.PostOrderVisit(body, verify)
+    tvm.tir.ir_pass.PostOrderVisit(body, verify)
     assert num_alloc[0] == 1
 
 def test_alloc_seq_type2():
@@ -426,7 +426,7 @@ def test_alloc_seq_type2():
 
     register_mem(scope_tb, max_bits)
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -440,18 +440,18 @@ def test_alloc_seq_type2():
             C[j] = 1.2
 
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
     num_alloc = [0]
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
             assert n.extents[0].value == 200
-    tvm.ir_pass.PostOrderVisit(body, verify)
+    tvm.tir.ir_pass.PostOrderVisit(body, verify)
     assert num_alloc[0] == 1
 
 
 def test_reuse_small_buffer():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.var("n")
     with ib.for_range(0, n, name="i") as i:
         with ib.for_range(0, 10, name="j") as j:
@@ -469,7 +469,7 @@ def test_reuse_small_buffer():
             E[j] = C[j]
 
     body = ib.get()
-    body = tvm.ir_pass.StorageRewrite(body)
+    body = tvm.tir.ir_pass.StorageRewrite(body)
 
     num_alloc = [0]
 
@@ -477,7 +477,7 @@ def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             num_alloc[0] += 1
             assert n.extents[0].value == 800
-    tvm.ir_pass.PostOrderVisit(body, verify)
+    tvm.tir.ir_pass.PostOrderVisit(body, verify)
     assert num_alloc[0] == 1
 
 def test_replace_dataflow():
@@ -515,7 +515,7 @@ def compute(a, b):
     def verify(n):
         if isinstance(n, tvm.tir.Allocate):
             assert n.extents[0].value == 268435456
-    tvm.ir_pass.PostOrderVisit(stmt, verify)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_storage_sync.py b/tests/python/unittest/test_pass_storage_sync.py
index c6c6b0fbdb391..9edfa95754355 100644
--- a/tests/python/unittest/test_pass_storage_sync.py
+++ b/tests/python/unittest/test_pass_storage_sync.py
@@ -36,11 +36,11 @@ def test_storage_sync():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
-    f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
-    flist = tvm.ir_pass.SplitHostDevice(f)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+    f = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
+    flist = tvm.tir.ir_pass.SplitHostDevice(f)
     f = flist[1]
-    f = tvm.ir_pass.ThreadSync(f, "shared")
+    f = tvm.tir.ir_pass.ThreadSync(f, "shared")
     body_list = tvm.tir.stmt_list(f.body.body.body.body)
     assert(body_list[1].value.name == "tvm_storage_sync")
 
@@ -54,7 +54,7 @@ def meminfo_cache():
             max_simd_bits=32,
             max_num_bits=128,
             head_address=tvm.tir.call_extern("handle", "global_cache"))
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
     cp = te.thread_axis((0, 1), "cop")
     A = ib.allocate("float32", 128, name="A", scope="global.cache")
@@ -65,7 +65,7 @@ def meminfo_cache():
                 ib.scope_attr(cp, "coproc_scope", 1)
                 A[j] = A[j + k * 10] + 2
     stmt = ib.get()
-    stmt = tvm.ir_pass.CoProcSync(stmt)
+    stmt = tvm.tir.ir_pass.CoProcSync(stmt)
     body = stmt.body.body.body
     blist = tvm.tir.stmt_list(body)
     assert(blist[1].value.name == "cop.coproc_read_barrier")
@@ -76,7 +76,7 @@ def meminfo_cache():
 
 
 def test_coproc_sync2():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
     cp = te.thread_axis((0, 1), "cop")
     ty = te.thread_axis("cthread")
@@ -93,7 +93,7 @@ def test_coproc_sync2():
             ib.scope_attr(cp, "coproc_scope", 2)
             A[ty] = 1.0
     stmt = ib.get()
-    stmt = tvm.ir_pass.CoProcSync(stmt)
+    stmt = tvm.tir.ir_pass.CoProcSync(stmt)
 
 def test_coproc_sync3():
     def __check_list(tvm_array, py_list):
@@ -102,7 +102,7 @@ def __check_list(tvm_array, py_list):
                 return False
         return True
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     n = te.size_var("n")
     cp = te.thread_axis((0, 1), "cop")
     A = ib.allocate("float32", 128, name="A", scope="global.cache")
@@ -119,7 +119,7 @@ def __check_list(tvm_array, py_list):
         A[0] = 0.0
 
     stmt = ib.get()
-    stmt = tvm.ir_pass.CoProcSync(stmt)
+    stmt = tvm.tir.ir_pass.CoProcSync(stmt)
     slist = tvm.tir.stmt_list(stmt[0].body.body)
     push_st = slist[2]
     slist = tvm.tir.stmt_list(slist[-1])
diff --git a/tests/python/unittest/test_pass_unroll.py b/tests/python/unittest/test_pass_unroll.py
index 8995395b64d24..165edab55f4e9 100644
--- a/tests/python/unittest/test_pass_unroll.py
+++ b/tests/python/unittest/test_pass_unroll.py
@@ -20,7 +20,7 @@
 
 
 def test_unroll_loop():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     dtype = 'int64'
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
@@ -32,28 +32,28 @@ def test_unroll_loop():
 
     stmt = ib.get()
     assert isinstance(stmt, tvm.tir.For)
-    ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, True)
+    ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, True)
     assert not isinstance(ret, tvm.tir.For)
-    ret = tvm.ir_pass.UnrollLoop(stmt, 15, 8, 0, True)
+    ret = tvm.tir.ir_pass.UnrollLoop(stmt, 15, 8, 0, True)
     assert isinstance(ret, tvm.tir.For)
-    ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, False)
+    ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, False)
     assert isinstance(ret, tvm.tir.For)
     assert ret.for_type == tvm.tir.For.Unrolled
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     ib.scope_attr(tvm.tir.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
     ib.emit(stmt)
     wrapped = ib.get()
     wrapped = tvm.tir.SeqStmt([wrapped, stmt])
     assert isinstance(ret, tvm.tir.For)
-    ret = tvm.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False)
+    ret = tvm.tir.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False)
     assert isinstance(ret[0], tvm.tir.For)
     assert ret[0].for_type == tvm.tir.For.Unrolled
     assert isinstance(ret[1], tvm.tir.For)
     assert ret[1].for_type != tvm.tir.For.Unrolled
 
 def test_unroll_fake_loop():
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     dtype = 'int32'
     n = te.size_var('n')
     Ab = tvm.tir.decl_buffer((n, ), dtype)
@@ -65,7 +65,7 @@ def test_unroll_fake_loop():
             Aptr[j + 1] = Aptr[i] + 1
 
     stmt = ib.get()
-    ret = tvm.ir_pass.UnrollLoop(stmt, 8, 0, 1, True)
+    ret = tvm.tir.ir_pass.UnrollLoop(stmt, 8, 0, 1, True)
     assert isinstance(ret[0], tvm.tir.Store)
 
 def test_unroll_single_count_loops():
@@ -78,7 +78,7 @@ def test_unroll_single_count_loops():
     stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
     # all parameters to UnrolLoops are default values except for
     # auto_unroll_max_extent which has been set to 1 (default:0)
-    after_unroll_stmt = tvm.ir_pass.UnrollLoop(stmt, 0, 8, 1, True)
+    after_unroll_stmt = tvm.tir.ir_pass.UnrollLoop(stmt, 0, 8, 1, True)
     assert after_unroll_stmt == stmt
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_vectorize.py b/tests/python/unittest/test_pass_vectorize.py
index af545aab8648a..2ade843361c02 100644
--- a/tests/python/unittest/test_pass_vectorize.py
+++ b/tests/python/unittest/test_pass_vectorize.py
@@ -20,7 +20,7 @@
 def test_vectorize_loop():
     dtype = 'int64'
     n = te.var('n')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, n) as i:
         with ib.for_range(0, 4, for_type="vectorize") as j:
@@ -28,7 +28,7 @@ def test_vectorize_loop():
     stmt = ib.get()
 
     assert isinstance(stmt.body, tvm.tir.For)
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.For)
     assert not isinstance(stmt.body, tvm.tir.For)
     assert isinstance(stmt.body.index, tvm.tir.Ramp)
@@ -37,14 +37,14 @@ def test_vectorize_loop():
 def test_vectorize_vector():
     dtype = 'int64'
     n = te.var('n')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32x4", name="A")
     with ib.for_range(0, n) as i:
         with ib.for_range(0, 4, for_type="vectorize") as j:
             A[j] = tvm.tir.const(1, A.dtype)
     stmt = ib.get()
     assert isinstance(stmt.body, tvm.tir.For)
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.For)
     assert not isinstance(stmt.body, tvm.tir.For)
     assert isinstance(stmt.body.index, tvm.tir.Ramp)
@@ -54,7 +54,7 @@ def test_vectorize_vector():
 def test_vectorize_with_if():
     n = te.var('n')
     x = te.var('x')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, 4, for_type="vectorize") as i:
         with ib.if_scope(x < n):
@@ -63,7 +63,7 @@ def test_vectorize_with_if():
             with ib.if_scope(i < n):
                 A[i] = 2.0
     stmt = ib.get()
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.IfThenElse)
     assert isinstance(stmt.then_case.index, tvm.tir.Ramp)
     assert isinstance(stmt.then_case.value, tvm.tir.Add)
@@ -72,41 +72,41 @@ def test_vectorize_with_if():
 
 def test_vectorize_with_le_cond():
     n = te.var('n')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, 4, for_type="vectorize") as i:
         with ib.if_scope(i <= n):
             A[i] = A[i] + 1
     stmt = ib.get()
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.For)
 
 def test_vectorize_with_ge_cond():
     n = te.var('n')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, 4, for_type="vectorize") as i:
         with ib.if_scope(i >= n):
             A[i] = A[i] + 1
     stmt = ib.get()
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.For)
 
 def test_vectorize_if_then_else():
     n = te.var('n')
     x = te.var('x')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, 4, for_type="vectorize") as i:
         A[i] = tvm.tir.call_intrin("float32", "tvm_if_then_else",
                                i > 0,
                                A[i] + 1, A[i])
     stmt = ib.get()
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert isinstance(stmt, tvm.tir.For)
 
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.pointer("float32", name="A")
     with ib.for_range(0, n) as k:
         with ib.for_range(0, 4, for_type="vectorize") as i:
@@ -115,7 +115,7 @@ def test_vectorize_if_then_else():
                                            A[k * 4 + i], 0)
     stmt = ib.get()
     assert isinstance(stmt.body, tvm.tir.For)
-    stmt = tvm.ir_pass.VectorizeLoop(stmt)
+    stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
     assert not isinstance(stmt.body, tvm.tir.For)
     assert isinstance(stmt.body.value.args[2], tvm.tir.Broadcast)
 
diff --git a/tests/python/unittest/test_pass_verify_gpu_code.py b/tests/python/unittest/test_pass_verify_gpu_code.py
index 724165385d809..6e138a29b3e97 100644
--- a/tests/python/unittest/test_pass_verify_gpu_code.py
+++ b/tests/python/unittest/test_pass_verify_gpu_code.py
@@ -20,7 +20,7 @@
 
 def get_verify_pass(valid, **kwargs):
     def verify_pass(stmt):
-        valid[0] = tvm.ir_pass.VerifyGPUCode(stmt, kwargs)
+        valid[0] = tvm.tir.ir_pass.VerifyGPUCode(stmt, kwargs)
         return stmt
     return verify_pass
 
@@ -49,14 +49,14 @@ def check_shared_memory(dtype):
             if not tvm.context(target).exist:
                 continue
             valid = [None]
-            with tvm.build_config(**{"add_lower_pass": [
+            with tvm.target.build_config(**{"add_lower_pass": [
                 (2, get_verify_pass(valid,
                                     max_shared_memory_per_block=type_size * M - 1,
                                     max_threads_per_block=M))]}):
                 tvm.build(s, [A, B], target)
             assert not valid[0]
 
-            with tvm.build_config(**{"add_lower_pass": [
+            with tvm.target.build_config(**{"add_lower_pass": [
                 (2, get_verify_pass(valid,
                                     max_shared_memory_per_block=type_size * M,
                                     max_threads_per_block=M))]}):
@@ -86,14 +86,14 @@ def test_local_memory():
             continue
 
         valid = [None]
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_local_memory_per_block=4 * M - 1,
                                 max_threads_per_block=1))]}):
             tvm.build(s, [A, B], target)
         assert not valid[0]
 
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_local_memory_per_block=4 * M,
                                 max_threads_per_block=1))]}):
@@ -121,21 +121,21 @@ def test_num_thread():
             continue
 
         valid = [None]
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N - 1))]}):
             tvm.build(s, [A, B], target)
         assert not valid[0]
 
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N))]}):
             tvm.build(s, [A, B], target)
         assert valid[0]
 
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N,
@@ -143,7 +143,7 @@ def test_num_thread():
             tvm.build(s, [A, B], target)
         assert not valid[0]
 
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N,
@@ -171,14 +171,14 @@ def test_multiple_kernels():
             continue
 
         valid = [None]
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N - 1))]}):
             tvm.build(s, [A, C], target)
         assert not valid[0]
 
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
             (2, get_verify_pass(valid,
                                 max_shared_memory_per_block=0,
                                 max_threads_per_block=N))]}):
@@ -202,7 +202,7 @@ def test_wrong_bind():
             continue
 
         valid = [None]
-        with tvm.build_config(**{"add_lower_pass": [
+        with tvm.target.build_config(**{"add_lower_pass": [
                 (2, get_verify_pass(valid, max_threads_per_block=N*N))]}):
             tvm.build(s, [A, B], target)
         assert not valid[0]
diff --git a/tests/python/unittest/test_pass_verify_memory.py b/tests/python/unittest/test_pass_verify_memory.py
index 336f341d61cfd..3747caed1586a 100644
--- a/tests/python/unittest/test_pass_verify_memory.py
+++ b/tests/python/unittest/test_pass_verify_memory.py
@@ -37,9 +37,9 @@ def lower(sch, args):
     sch = sch.normalize()
     bounds = tvm.te.schedule.InferBound(sch)
     stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
-    stmt = tvm.ir_pass.LoopPartition(stmt, False)
-    stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
-    func = tvm.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True)
+    stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64)
+    func = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True)
     return func
 
 
@@ -60,7 +60,7 @@ def test_verify_memory_all_bind():
   func = lower(s, [A, B])
 
   for dev_type in gpu_devices + other_devices:
-    assert tvm.ir_pass.VerifyMemory(func, dev_type)
+    assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
 
 
 # Computations are not bound.
@@ -77,9 +77,9 @@ def test_verify_memory_not_bind():
   func = lower(s, [A, B])
 
   for dev_type in gpu_devices:
-    assert not tvm.ir_pass.VerifyMemory(func, dev_type)
+    assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type)
   for dev_type in other_devices:
-    assert tvm.ir_pass.VerifyMemory(func, dev_type)
+    assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
 
 
 # Computations are partially bound.
@@ -101,9 +101,9 @@ def test_verify_memory_partially_bind():
   func = lower(s, [A, B, C, D])
 
   for dev_type in gpu_devices:
-    assert not tvm.ir_pass.VerifyMemory(func, dev_type)
+    assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type)
   for dev_type in other_devices:
-    assert tvm.ir_pass.VerifyMemory(func, dev_type)
+    assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_pass_virtual_thread.py b/tests/python/unittest/test_pass_virtual_thread.py
index a6675ffe9ba11..2d96696eed882 100644
--- a/tests/python/unittest/test_pass_virtual_thread.py
+++ b/tests/python/unittest/test_pass_virtual_thread.py
@@ -36,9 +36,9 @@ def test_virtual_thread():
 
     Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
     A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
-    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
-    stmt = tvm.ir_pass.Simplify(stmt)
-    stmt = tvm.ir_pass.InjectVirtualThread(stmt)
+    stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+    stmt = tvm.tir.ir_pass.Simplify(stmt)
+    stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt)
     print(stmt)
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_runtime_extension.py b/tests/python/unittest/test_runtime_extension.py
index 1dd9bc8a19f2a..375b99b0ad315 100644
--- a/tests/python/unittest/test_runtime_extension.py
+++ b/tests/python/unittest/test_runtime_extension.py
@@ -33,13 +33,13 @@ def test_dltensor_compatible():
     n = te.var('n')
     Ab = tvm.tir.decl_buffer((n,), dtype)
     i = te.var('i')
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     A = ib.buffer_ptr(Ab)
     with ib.for_range(0, n - 1, "i") as i:
         A[i + 1] = A[i] + 1
     stmt = ib.get()
-    fapi = tvm.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True)
-    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+    fapi = tvm.tir.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True)
+    fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
     f = tvm.target.codegen.build_module(fapi, "stackvm")
     a = tvm.nd.array(np.zeros(10, dtype=dtype))
     aview = MyTensorView(a)
diff --git a/tests/python/unittest/test_runtime_micro.py b/tests/python/unittest/test_runtime_micro.py
index 9e39898fa084e..28fdb11c3de4b 100644
--- a/tests/python/unittest/test_runtime_micro.py
+++ b/tests/python/unittest/test_runtime_micro.py
@@ -47,7 +47,7 @@ def relay_micro_build(func, dev_config, params=None):
     mod : tvm.runtime.Module
         graph runtime module for the target device
     """
-    with tvm.build_config(disable_vectorize=True):
+    with tvm.target.build_config(disable_vectorize=True):
         graph, c_mod, params = relay.build(func, target="c", params=params)
     micro_mod = create_micro_mod(c_mod, dev_config)
     ctx = tvm.micro_dev(0)
diff --git a/tests/python/unittest/test_runtime_module_load.py b/tests/python/unittest/test_runtime_module_load.py
index dfe03dbbec388..e7771e3c6721c 100644
--- a/tests/python/unittest/test_runtime_module_load.py
+++ b/tests/python/unittest/test_runtime_module_load.py
@@ -57,8 +57,8 @@ def save_object(names):
             tvm.tir.Store(Ab.data,
                            tvm.tir.Load(dtype, Ab.data, i) + 1,
                            i + 1))
-        fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
-        fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+        fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+        fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
         m = tvm.target.codegen.build_module(fapi, "llvm")
         for name in names:
             m.save(name)
diff --git a/tests/python/unittest/test_schedule_bound_inference.py b/tests/python/unittest/test_schedule_bound_inference.py
index abb8d7e146515..484aa503e0667 100644
--- a/tests/python/unittest/test_schedule_bound_inference.py
+++ b/tests/python/unittest/test_schedule_bound_inference.py
@@ -113,7 +113,7 @@ def test_bound_fusesplit1():
     bounds = tvm.te.schedule.InferBound(s)
     assert isinstance(bounds, tvm.container.Map)
     idxdiv = tvm.tir.indexdiv
-    assert(tvm.ir_pass.Simplify(
+    assert(tvm.tir.ir_pass.Simplify(
             bounds[A1.op.axis[0]].min - idxdiv(xo * split1, l)).value == 0)
 
     expected_extent = (idxdiv((xo + 1) * split1 - 1, l) - idxdiv(xo * split1, l) + 1)
@@ -121,11 +121,11 @@ def test_bound_fusesplit1():
         for j in range(1, 6):
             for k in range(1, 6):
                 vars = tvm.runtime.convert({split1: tvm.tir.const(i, "int32"), l: tvm.tir.const(j, "int32"), xo.var: tvm.tir.const(k, "int32")})
-                comp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value
-                exp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expected_extent, vars)).value
+                comp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value
+                exp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(expected_extent, vars)).value
                 assert(comp_ext == exp_ext)
 
-    assert(tvm.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0)
+    assert(tvm.tir.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0)
 
 def test_bound_fusesplit2():
     m = te.var("m")
@@ -143,10 +143,10 @@ def test_bound_fusesplit2():
     bounds = tvm.te.schedule.InferBound(s)
     assert isinstance(bounds, tvm.container.Map)
     vars = tvm.runtime.convert({xo.var: tvm.tir.const(5, "int32")})
-    assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2)
-    assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3)
-    assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1)
-    assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3)
+    assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2)
+    assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3)
+    assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1)
+    assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3)
 
 
 def test_bound_warp():
@@ -369,12 +369,12 @@ def intrin_func(ins, outs):
         aa = ins[0]
         cc = outs[0]
         def _body():
-          ib = tvm.ir_builder.create()
+          ib = tvm.tir.ir_builder.create()
           ib.emit(tvm.tir.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r")))
           return ib.get()
         return _body()
-      with tvm.build_config(offset_factor=1):
-        return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb})
+      with tvm.target.build_config(offset_factor=1):
+        return te.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb})
 
     test_func = intrin_test()
     A = te.placeholder((20,20), name='A')
diff --git a/tests/python/unittest/test_schedule_schedule_ops.py b/tests/python/unittest/test_schedule_schedule_ops.py
index d83dc96032152..8d10ceea0b48a 100644
--- a/tests/python/unittest/test_schedule_schedule_ops.py
+++ b/tests/python/unittest/test_schedule_schedule_ops.py
@@ -145,7 +145,7 @@ def test_inline_mixed():
     def check(x):
         if isinstance(x, tvm.tir.Call):
             assert x.func != A2
-    tvm.ir_pass.PostOrderVisit(s[C].op.body[0], check)
+    tvm.tir.ir_pass.PostOrderVisit(s[C].op.body[0], check)
 
 
 def test_scan_inline1():
@@ -311,9 +311,9 @@ def intrin_func(ins, outs):
             "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
         return body, reset, update
 
-    with tvm.build_config(data_alignment=16,
+    with tvm.target.build_config(data_alignment=16,
                           offset_factor=16):
-        return tvm.decl_tensor_intrin(z.op, intrin_func,
+        return te.decl_tensor_intrin(z.op, intrin_func,
                                       binds={w: Wb})
 
 
@@ -363,12 +363,12 @@ def create_buffer(t):
         binds[z] = create_buffer(z)
 
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
         return ib.get()
 
-    with tvm.build_config(offset_factor=16):
-        return tvm.decl_tensor_intrin(z.op, intrin_func, binds=binds)
+    with tvm.target.build_config(offset_factor=16):
+        return te.decl_tensor_intrin(z.op, intrin_func, binds=binds)
 
 
 def test_schedule_tensor_compute2():
diff --git a/tests/python/unittest/test_schedule_tensor_core.py b/tests/python/unittest/test_schedule_tensor_core.py
index 5dbe04f93d385..ae2301caffa33 100644
--- a/tests/python/unittest/test_schedule_tensor_core.py
+++ b/tests/python/unittest/test_schedule_tensor_core.py
@@ -35,7 +35,7 @@ def intrin_wmma_load_matrix(shape, scope):
     BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col)
 
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
 
         BA = ins[0]
         BC = outs[0]
@@ -44,7 +44,7 @@ def intrin_func(ins, outs):
                                 BA.access_ptr('r'), col, 'row_major'))
         return ib.get()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
 
 
 def intrin_wmma_gemm(shape):
@@ -65,12 +65,12 @@ def intrin_func(ins, outs):
         BC, = outs
 
         def init():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0))
             return ib.get()
 
         def update():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
                                     BC.data, BC.elem_offset // (n * m),
                                     BA.data, BA.elem_offset // (n * l),
@@ -80,7 +80,7 @@ def update():
 
         return update(), init(), update()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
 
 
 def intrin_wmma_store_matrix(shape):
@@ -91,7 +91,7 @@ def intrin_wmma_store_matrix(shape):
     BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m)
 
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
 
         BA = ins[0]
         BC = outs[0]
@@ -100,7 +100,7 @@ def intrin_func(ins, outs):
                                 BC.access_ptr('w'), m, 'row_major'))
         return ib.get()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
 
 
 def test_tensor_core_batch_matmal():
diff --git a/tests/python/unittest/test_schedule_tensorize.py b/tests/python/unittest/test_schedule_tensorize.py
index c23a878c5a397..28a3ae875fc7e 100644
--- a/tests/python/unittest/test_schedule_tensorize.py
+++ b/tests/python/unittest/test_schedule_tensorize.py
@@ -25,8 +25,8 @@ def intrin_func(ins, outs):
         xx, yy = ins
         zz = outs[0]
         return tvm.tir.call_packed("vadd", xx, yy, zz)
-    with tvm.build_config(offset_factor=16):
-        return tvm.decl_tensor_intrin(z.op, intrin_func)
+    with tvm.target.build_config(offset_factor=16):
+        return te.decl_tensor_intrin(z.op, intrin_func)
 
 def intrin_gemv(m, n):
     w = te.placeholder((m, n), name='w')
@@ -52,9 +52,9 @@ def intrin_func(ins, outs):
             "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
         return body, reset, update
 
-    with tvm.build_config(data_alignment=16,
+    with tvm.target.build_config(data_alignment=16,
                           offset_factor=16):
-        return tvm.decl_tensor_intrin(z.op, intrin_func,
+        return te.decl_tensor_intrin(z.op, intrin_func,
                                       binds={w: Wb})
 
 def intrin_gemv_no_reset(m, n):
@@ -79,9 +79,9 @@ def intrin_func(ins, outs):
             "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
         return body, None, update
 
-    with tvm.build_config(data_alignment=16,
+    with tvm.target.build_config(data_alignment=16,
                           offset_factor=16):
-        return tvm.decl_tensor_intrin(z.op, intrin_func,
+        return te.decl_tensor_intrin(z.op, intrin_func,
                                       binds={w: Wb})
 
 
@@ -100,13 +100,13 @@ def check(factor):
         dom_map = tvm.te.schedule.InferBound(s)
         finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
         out_dom, in_dom = finfer(s[z], dom_map)
-        assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor)
-        assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor)
-        assert tvm.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor)
+        assert tvm.tir.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor)
         fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
         body = fmatch(s[z], out_dom, in_dom, vadd)
-        assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
-                                 tvm.ir_pass.CanonicalSimplify(vadd.op.body[0]))
+        assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+                                 tvm.tir.ir_pass.CanonicalSimplify(vadd.op.body[0]))
         stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
         tvm.lower(s, [x, y, z])
 
@@ -133,13 +133,13 @@ def check(factor):
         dom_map = tvm.te.schedule.InferBound(s)
         finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
         out_dom, in_dom = finfer(s[C], dom_map)
-        assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
-        assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
-        assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
         fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
         body = fmatch(s[C], out_dom, in_dom, gemv)
-        assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
-                                 tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+        assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+                                 tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
         stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
         tvm.lower(s, [A, B, C])
 
@@ -157,13 +157,13 @@ def check_rfactor(factor, rfactor):
         dom_map = tvm.te.schedule.InferBound(s)
         finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
         out_dom, in_dom = finfer(s[C], dom_map)
-        assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
-        assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
-        assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
         fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
         body = fmatch(s[C], out_dom, in_dom, gemv)
-        assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
-                                 tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+        assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+                                 tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
         stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
         tvm.lower(s, [A, B, C])
 
@@ -180,13 +180,13 @@ def check_rfactor_no_reset(factor, rfactor):
         dom_map = tvm.te.schedule.InferBound(s)
         finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
         out_dom, in_dom = finfer(s[C], dom_map)
-        assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
-        assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
-        assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
         fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
         body = fmatch(s[C], out_dom, in_dom, gemv)
-        assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
-                                 tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+        assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+                                 tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
         stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
         tvm.lower(s, [A, B, C])
 
@@ -204,13 +204,13 @@ def check_rfactor_no_reset_multi_reduction(factor, rfactor):
         dom_map = tvm.te.schedule.InferBound(s)
         finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
         out_dom, in_dom = finfer(s[C], dom_map)
-        assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
-        assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
-        assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+        assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
         fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
         body = fmatch(s[C], out_dom, in_dom, gemv)
-        assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
-                                 tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+        assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+                                 tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
         stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
         tvm.lower(s, [A, B, C])
 
@@ -237,8 +237,8 @@ def intrin_func(ins, outs):
             zz = outs[0]
             return tvm.tir.call_packed("op", xx, zz)
 
-        with tvm.build_config(offset_factor=2):
-            return tvm.decl_tensor_intrin(y.op, intrin_func)
+        with tvm.target.build_config(offset_factor=2):
+            return te.decl_tensor_intrin(y.op, intrin_func)
 
     A = te.placeholder((5, 5), name='A')
     B = te.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)])
@@ -275,8 +275,8 @@ def intrin_multivadd(n):
         def intrin_func(ins, outs):
             return tvm.tir.call_packed("multivadd")
 
-        with tvm.build_config():
-            return tvm.decl_tensor_intrin(z.op, intrin_func, name="multivadd")
+        with tvm.target.build_config():
+            return te.decl_tensor_intrin(z.op, intrin_func, name="multivadd")
 
     def intrin_vadd(n):
         dtype = 'float32'
@@ -291,14 +291,14 @@ def create_buffer(t):
                                    offset_factor=16)
 
         def intrin_func(ins, outs):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_extern("float32", 'vadd',
                                     ins[0].access_ptr("r"), ins[1].access_ptr('r'),
                                     outs[0].access_ptr('wr')))
             return ib.get()
 
-        with tvm.build_config(offset_factor=16):
-            return tvm.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x),
+        with tvm.target.build_config(offset_factor=16):
+            return te.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x),
                                                                     y: create_buffer(y),
                                                                     z: create_buffer(z)})
 
diff --git a/tests/webgl/test_local_topi_conv2d_nchw.py b/tests/webgl/test_local_topi_conv2d_nchw.py
index 484143a1cdd52..0d9b7776096a2 100644
--- a/tests/webgl/test_local_topi_conv2d_nchw.py
+++ b/tests/webgl/test_local_topi_conv2d_nchw.py
@@ -60,7 +60,7 @@ def check_device(device):
         w = tvm.nd.array(w_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
-        with tvm.build_config(auto_unroll_max_step=1400,
+        with tvm.target.build_config(auto_unroll_max_step=1400,
                               unroll_explicit=(device != "cuda")):
             func1 = tvm.build(s1, [A, W, B], device, name="conv2d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
             func2 = tvm.build(s2, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
diff --git a/topi/python/topi/arm_cpu/bitserial_conv2d.py b/topi/python/topi/arm_cpu/bitserial_conv2d.py
index 1f6f68c37b4b1..bdda496f8fb81 100644
--- a/topi/python/topi/arm_cpu/bitserial_conv2d.py
+++ b/topi/python/topi/arm_cpu/bitserial_conv2d.py
@@ -214,7 +214,7 @@ def _intrin_func(ins, outs):
             return_dtype = 'uint16x8'
 
         def _instr(index):
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             if index == 1: # reduce reset
                 irb.emit(zz.vstore(0, tvm.tir.const(0, return_dtype)))
                 return irb.get()
@@ -271,8 +271,8 @@ def _instr(index):
             return irb.get()
         # body, reset, update
         return _instr(0), _instr(1), _instr(2)
-    with tvm.build_config(offset_factor=1, partition_const_loop=True):
-        return tvm.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb})
+    with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+        return te.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb})
 
 # ARM specific schedule that using custom microkernel
 def _schedule_spatial_conv2d_nhwc(cfg, s, data_pad, data_vec, kernel_vec,
diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py
index 72ed4da510bd8..25b338e06b5f8 100644
--- a/topi/python/topi/arm_cpu/conv2d.py
+++ b/topi/python/topi/arm_cpu/conv2d.py
@@ -345,13 +345,13 @@ def _conv2d_arm_cpu_winograd_nnpack(
     cfg.define_knob('winograd_nnpack_algorithm', [convolution_algorithm])
 
     assert N == 1
-    with tvm.tag_scope("winograd_nnpack_conv2d_weight_transform"):
+    with tvm.te.tag_scope("winograd_nnpack_conv2d_weight_transform"):
         transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform(
             kernel, algorithm=cfg['winograd_nnpack_algorithm'].val)
         if autotvm.GLOBAL_SCOPE.in_tuning:
             transformed_kernel = te.compute(transformed_kernel.shape, lambda *args: 0.0)
 
-    with tvm.tag_scope("winograd_nnpack_conv2d_output"):
+    with tvm.te.tag_scope("winograd_nnpack_conv2d_output"):
         output = tvm.contrib.nnpack.convolution_inference_without_weight_transform(
             data, transformed_kernel,
             bias=None,
@@ -399,7 +399,7 @@ def conv2d_nchw_winograd_nnpack_without_weight_transform(
     W = (IW + pl + pr - 3) // WSTR + 1
 
     assert N == 1
-    with tvm.tag_scope("winograd_nnpack_conv2d_output"):
+    with tvm.te.tag_scope("winograd_nnpack_conv2d_output"):
         output = tvm.contrib.nnpack.convolution_inference_without_weight_transform(
             data=data,
             transformed_kernel=transformed_kernel,
diff --git a/topi/python/topi/arm_cpu/tensor_intrin.py b/topi/python/topi/arm_cpu/tensor_intrin.py
index 7a656b8af0404..135c87d595117 100644
--- a/topi/python/topi/arm_cpu/tensor_intrin.py
+++ b/topi/python/topi/arm_cpu/tensor_intrin.py
@@ -76,7 +76,7 @@ def dot_int8_int8_int32(int32_lanes, dtype='uint'):
 
     def _intrin_func(ins, outs):
         def _instr(index):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             if index == 1:
                 ib.emit(outs[0].vstore(0, tvm.tir.const(0, '%s32x%d' % (dtype, int32_lanes))))
                 return ib.get()
@@ -107,5 +107,5 @@ def _instr(index):
         # body, reset, update
         return _instr(0), _instr(1), _instr(2)
 
-    with tvm.build_config(offset_factor=1, partition_const_loop=True):
-        return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+    with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+        return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
diff --git a/topi/python/topi/cuda/depthwise_conv2d.py b/topi/python/topi/cuda/depthwise_conv2d.py
index de11dc6683691..db9da844e3afe 100644
--- a/topi/python/topi/cuda/depthwise_conv2d.py
+++ b/topi/python/topi/cuda/depthwise_conv2d.py
@@ -167,7 +167,7 @@ def _schedule(temp, Filter, DepthwiseConv2d):
         b, h, w, c = s[Output].op.axis
 
         # num_thread here could be 728, it is larger than cuda.max_num_threads
-        num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
+        num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value
         target = tvm.target.Target.current()
         if target and (target.target_name not in ["cuda", "nvptx"]):
             num_thread = target.max_num_threads
diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py
index 2b3f38e79eeab..e008dcdb1ce24 100644
--- a/topi/python/topi/cuda/nms.py
+++ b/topi/python/topi/cuda/nms.py
@@ -78,7 +78,7 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor
     num_anchors = data.shape[1]
     elem_length = data.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     data = ib.buffer_ptr(data)
 
@@ -141,7 +141,7 @@ def flag_scan(flag, prefix_sum):
     batch_size = flag.shape[0]
     num_anchors = flag.shape[1]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     flag = ib.buffer_ptr(flag)
     prefix_sum = ib.buffer_ptr(prefix_sum)
@@ -201,7 +201,7 @@ def out_rewrite(data, flag, prefix_sum, valid_count, out):
     num_anchors = out.shape[1]
     elem_length = out.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     one = tvm.tir.const(1, dtype=out.dtype)
     data = ib.buffer_ptr(data)
@@ -373,7 +373,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
     num_anchors = data.shape[1]
     box_data_length = data.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     data = ib.buffer_ptr(data)
     sorted_index = ib.buffer_ptr(sorted_index)
@@ -498,7 +498,7 @@ def invalid_to_bottom_pre(data, flag, idx):
     num_anchors = data.shape[1]
     elem_length = data.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     data = ib.buffer_ptr(data)
     flag = ib.buffer_ptr(flag)
@@ -557,7 +557,7 @@ def invalid_to_bottom_ir(data, flag, idx, out):
     num_anchors = data.shape[1]
     elem_length = data.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     data = ib.buffer_ptr(data)
     flag = ib.buffer_ptr(flag)
diff --git a/topi/python/topi/cuda/rcnn/proposal.py b/topi/python/topi/cuda/rcnn/proposal.py
index 03907a50c0af8..3546448cd3063 100644
--- a/topi/python/topi/cuda/rcnn/proposal.py
+++ b/topi/python/topi/cuda/rcnn/proposal.py
@@ -71,7 +71,7 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r
     tx = te.thread_axis("threadIdx.x")
     bx = te.thread_axis("blockIdx.x")
     tid = bx * max_threads + tx
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     ib.scope_attr(tx, "thread_extent", nthread_tx)
     ib.scope_attr(bx, "thread_extent", nthread_bx)
 
@@ -154,7 +154,7 @@ def argsort_ir(data_buf, out_index_buf):
     """
     batch, num_bbox = get_const_tuple(data_buf.shape)
     max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     p_data = ib.buffer_ptr(data_buf)
     index_out = ib.buffer_ptr(out_index_buf)
     nthread_tx = max_threads
@@ -229,7 +229,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
     max_threads = int(math.sqrt(tvm.target.Target.current(allow_none=False).max_num_threads))
     tx = te.thread_axis("threadIdx.x")
     bx = te.thread_axis("blockIdx.x")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     p_data = ib.buffer_ptr(sorted_bbox_buf)
     p_out = ib.buffer_ptr(out_buf)
     nthread_tx = max_threads
@@ -277,7 +277,7 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf):
     rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch
     nthread_tx = batch
     tx = te.thread_axis("threadIdx.x")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     ib.scope_attr(tx, "thread_extent", nthread_tx)
     i = ib.allocate('int32', (1,), 'i', scope='local')
     i[0] = 0
diff --git a/topi/python/topi/cuda/sort.py b/topi/python/topi/cuda/sort.py
index c1c622ff1d30a..f9e535e133fac 100644
--- a/topi/python/topi/cuda/sort.py
+++ b/topi/python/topi/cuda/sort.py
@@ -86,7 +86,7 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None):
         elif i > axis:
             axis_mul_after *= value
     max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     data = ib.buffer_ptr(data)
     values_out = ib.buffer_ptr(values_out)
     if indices_out is not None:
@@ -110,7 +110,7 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None):
                 values_out[base_idx + tid * axis_mul_after] = data[base_idx + tid * axis_mul_after]
                 if indices_out is not None:
                     indices_out[base_idx + tid * axis_mul_after] = \
-                        tvm.generic.cast(tid, indices_out.dtype)
+                        tvm.tir.generic.cast(tid, indices_out.dtype)
     ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
                          tvm.runtime.convert(['shared']),
                          tvm.tir.Call.Intrinsic, None, 0))
@@ -185,7 +185,7 @@ def sort_nms_ir(data, valid_count, output, axis, is_ascend):
         elif i > axis:
             axis_mul_after *= value
     max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     data = ib.buffer_ptr(data)
     valid_count = ib.buffer_ptr(valid_count)
     output = ib.buffer_ptr(output)
diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py
index 9714194271f05..30784f45a5914 100644
--- a/topi/python/topi/cuda/ssd/multibox.py
+++ b/topi/python/topi/cuda/ssd/multibox.py
@@ -60,7 +60,7 @@ def multibox_prior_ir(data, out, sizes, ratios, steps, offsets):
     ty = te.thread_axis("threadIdx.y")
     bx = te.thread_axis("blockIdx.x")
     by = te.thread_axis("blockIdx.y")
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     p_out = ib.buffer_ptr(out)
     in_height = data.shape[2]
     in_width = data.shape[3]
@@ -180,7 +180,7 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp
     num_classes = cls_prob.shape[1]
     num_anchors = cls_prob.shape[2]
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     cls_prob = ib.buffer_ptr(cls_prob)
     cls_id = ib.buffer_ptr(temp_cls_id)
@@ -292,7 +292,7 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw,
             tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox + ow)), ox + ow), \
             tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy + oh)), oy + oh)
 
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     loc_pred = ib.buffer_ptr(loc_pred)
     anchor = ib.buffer_ptr(anchor)
diff --git a/topi/python/topi/cuda/tensor_intrin.py b/topi/python/topi/cuda/tensor_intrin.py
index 214ceec0faf61..468e2cd21fa8e 100644
--- a/topi/python/topi/cuda/tensor_intrin.py
+++ b/topi/python/topi/cuda/tensor_intrin.py
@@ -56,7 +56,7 @@ def _instr(index):
             if index == 1:
                 return zz.vstore(0, 0)
 
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
 
             vec_x = xx.vload(0, dtype='int8x4')
             vec_y = yy.vload(0, dtype='int8x4')
@@ -69,11 +69,11 @@ def _instr(index):
 
         return _instr(0), _instr(1), _instr(2) # body, reset, update
 
-    with tvm.build_config(data_alignment=4, offset_factor=1) as cfg:
+    with tvm.target.build_config(data_alignment=4, offset_factor=1) as cfg:
         scopes = {x: x_scope, y: y_scope, z: z_scope}
         binds = {t: tvm.tir.decl_buffer(t.shape, t.dtype, t.op.name,
                                         data_alignment=cfg.data_alignment,
                                         offset_factor=cfg.offset_factor,
                                         scope=scopes[t]) for t in [x, y, z]}
 
-        return tvm.decl_tensor_intrin(z.op, _intrin_func, binds=binds)
+        return te.decl_tensor_intrin(z.op, _intrin_func, binds=binds)
diff --git a/topi/python/topi/generic/__init__.py b/topi/python/topi/generic/__init__.py
index b9db1560e5883..bf45bc30a42d4 100644
--- a/topi/python/topi/generic/__init__.py
+++ b/topi/python/topi/generic/__init__.py
@@ -28,7 +28,7 @@
 
   # create schedule that dispatches to topi.cuda.schedule_injective
   with tvm.target.create("cuda"):
-    s = tvm.generic.schedule_injective(outs)
+    s = tvm.tir.generic.schedule_injective(outs)
 """
 from __future__ import absolute_import as _abs
 
diff --git a/topi/python/topi/generic_op_impl.py b/topi/python/topi/generic_op_impl.py
index 864bd936a268f..f4695d3db3ee3 100644
--- a/topi/python/topi/generic_op_impl.py
+++ b/topi/python/topi/generic_op_impl.py
@@ -90,12 +90,12 @@ def _bind_generic_ops():
     """Bind generic operators for Tensor."""
     # Check __op_priority__ to make sure the binding happens only once.
     __op_priority__ = 1
-    if __op_priority__ > tvm.generic.__op_priority__:
-        tvm.generic.__op_priority__ = __op_priority__
-        tvm.generic.add = _make_bop(_broadcast.add, tvm.generic.add)
-        tvm.generic.subtract = _make_bop(_broadcast.subtract, tvm.generic.subtract)
-        tvm.generic.multiply = _make_bop(_broadcast.multiply, tvm.generic.multiply)
-        tvm.generic.divide = _make_bop(_broadcast.divide, tvm.generic.divide)
-        tvm.generic.cast = _math.cast
+    if __op_priority__ > tvm.tir.generic.__op_priority__:
+        tvm.tir.generic.__op_priority__ = __op_priority__
+        tvm.tir.generic.add = _make_bop(_broadcast.add, tvm.tir.generic.add)
+        tvm.tir.generic.subtract = _make_bop(_broadcast.subtract, tvm.tir.generic.subtract)
+        tvm.tir.generic.multiply = _make_bop(_broadcast.multiply, tvm.tir.generic.multiply)
+        tvm.tir.generic.divide = _make_bop(_broadcast.divide, tvm.tir.generic.divide)
+        tvm.tir.generic.cast = _math.cast
 
 _bind_generic_ops()
diff --git a/topi/python/topi/intel_graphics/depthwise_conv2d.py b/topi/python/topi/intel_graphics/depthwise_conv2d.py
index 618ef50481b9c..a54941315a1a3 100644
--- a/topi/python/topi/intel_graphics/depthwise_conv2d.py
+++ b/topi/python/topi/intel_graphics/depthwise_conv2d.py
@@ -168,7 +168,7 @@ def _schedule(temp, Filter, DepthwiseConv2d):
         b, h, w, c = s[Output].op.axis
 
         # num_thread here could be 728, it is larger than cuda.max_num_threads
-        num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
+        num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value
         target = tvm.target.Target.current()
         if target and (target.target_name not in ["cuda", "nvptx"]):
             num_thread = target.max_num_threads
diff --git a/topi/python/topi/math.py b/topi/python/topi/math.py
index 0fceaadfc09c3..5b6b9ab8da754 100644
--- a/topi/python/topi/math.py
+++ b/topi/python/topi/math.py
@@ -22,7 +22,7 @@
 from . import cpp
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def identity(x):
     """Take identity of input x.
 
@@ -40,7 +40,7 @@ def identity(x):
     return te.compute(x.shape, lambda *i: x(*i))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def negative(x):
     """Take negation of input x.
 
@@ -58,7 +58,7 @@ def negative(x):
     return te.compute(x.shape, lambda *i: -x(*i))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def exp(x):
     """Take exponential of input x.
 
@@ -75,7 +75,7 @@ def exp(x):
     return te.compute(x.shape, lambda *i: te.exp(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def erf(x):
     """Take gauss error function of input x.
 
@@ -92,7 +92,7 @@ def erf(x):
     return te.compute(x.shape, lambda *i: te.erf(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def tanh(x):
     """Take hyperbolic tanh of input x.
 
@@ -109,7 +109,7 @@ def tanh(x):
     return te.compute(x.shape, lambda *i: te.tanh(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def cos(x):
     """Take cos of input x.
 
@@ -126,7 +126,7 @@ def cos(x):
     return te.compute(x.shape, lambda *i: te.cos(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def sin(x):
     """Take sin of input x.
 
@@ -143,7 +143,7 @@ def sin(x):
     return te.compute(x.shape, lambda *i: te.sin(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def atan(x):
     """Take atan of input x.
 
@@ -159,7 +159,7 @@ def atan(x):
     """
     return te.compute(x.shape, lambda *i: te.atan(x(*i)))
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def floor(x):
     """Take floor of input x.
 
@@ -176,7 +176,7 @@ def floor(x):
     return te.compute(x.shape, lambda *i: te.floor(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def ceil(x):
     """Take ceil of input x.
 
@@ -209,7 +209,7 @@ def sign(x):
     return cpp.sign(x)
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def trunc(x):
     """Take truncated value of the input of x, element-wise.
 
@@ -226,7 +226,7 @@ def trunc(x):
     return te.compute(x.shape, lambda *i: te.trunc(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def abs(x):
     """Take absolute value of the input of x, element-wise.
 
@@ -243,7 +243,7 @@ def abs(x):
     return te.compute(x.shape, lambda *i: te.abs(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def isnan(x):
     """Check if value of x is NaN, element-wise.
 
@@ -260,7 +260,7 @@ def isnan(x):
     return te.compute(x.shape, lambda *i: te.isnan(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def round(x):
     """Round elements of x to nearest integer.
 
@@ -277,7 +277,7 @@ def round(x):
     return te.compute(x.shape, lambda *i: te.round(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def log(x):
     """Take logarithm of input x.
 
@@ -294,7 +294,7 @@ def log(x):
     return te.compute(x.shape, lambda *i: te.log(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def sqrt(x):
     """Take square root of input x.
 
@@ -311,7 +311,7 @@ def sqrt(x):
     return te.compute(x.shape, lambda *i: te.sqrt(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def rsqrt(x):
     """Take inverse square root of input x.
 
@@ -328,7 +328,7 @@ def rsqrt(x):
     return te.compute(x.shape, lambda *i: te.rsqrt(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def sigmoid(x):
     """Take sigmoid tanh of input x.
 
@@ -345,7 +345,7 @@ def sigmoid(x):
     return te.compute(x.shape, lambda *i: te.sigmoid(x(*i)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def left_shift(x, n):
     """Take n bits left shift of input x.
 
@@ -364,7 +364,7 @@ def left_shift(x, n):
     return te.compute(x.shape, lambda *i: x(*i) << n)
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def right_shift(x, n):
     """Take n bits right shift of input x.
 
@@ -383,7 +383,7 @@ def right_shift(x, n):
     return te.compute(x.shape, lambda *i: x(*i) >> n)
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def clip(x, a_min, a_max):
     """Clip (limit) the values in an array. Given an interval, values
     outside the interval are clipped to the interval edges.
diff --git a/topi/python/topi/nn/dilate.py b/topi/python/topi/nn/dilate.py
index eab612df9ca14..f628fadee96e5 100644
--- a/topi/python/topi/nn/dilate.py
+++ b/topi/python/topi/nn/dilate.py
@@ -47,7 +47,7 @@ def dilate(data, strides, name="DilatedInput"):
             n, len(strides)))
 
     out_shape = tuple(
-        tvm.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))
+        tvm.tir.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))
 
     def _dilate(*indices):
         not_zero = []
diff --git a/topi/python/topi/nn/elemwise.py b/topi/python/topi/nn/elemwise.py
index 292dbca71b4d6..1315a48cc0ef5 100644
--- a/topi/python/topi/nn/elemwise.py
+++ b/topi/python/topi/nn/elemwise.py
@@ -21,7 +21,7 @@
 from .. import tag
 from ..util import get_const_int
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def relu(x):
     """Take relu of input x.
 
@@ -38,7 +38,7 @@ def relu(x):
     return te.compute(x.shape, lambda *i: tvm.te.max(x(*i), tvm.tir.const(0, x.dtype)))
 
 
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
 def leaky_relu(x, alpha):
     """Take leaky relu of input x.
 
@@ -61,7 +61,7 @@ def _compute(*indices):
         return tvm.tir.Select(value > 0, value, value * calpha)
     return te.compute(x.shape, _compute)
 
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
 def prelu(x, slope, axis=1):
     """ PReLU.
     It accepts two arguments: an input ``x`` and a weight array ``W``
diff --git a/topi/python/topi/nn/fifo_buffer.py b/topi/python/topi/nn/fifo_buffer.py
index 32008855bd129..de283e0de4eba 100644
--- a/topi/python/topi/nn/fifo_buffer.py
+++ b/topi/python/topi/nn/fifo_buffer.py
@@ -22,7 +22,7 @@
 from .. import tag
 from ..transform import concatenate, strided_slice
 
-@tvm.tag_scope(tag=tag.INJECTIVE+",fifo_buffer")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",fifo_buffer")
 def fifo_buffer(data, buffer, axis):
     """
     FIFO buffer to enable computation reuse in CNNs with sliding indow input
diff --git a/topi/python/topi/nn/flatten.py b/topi/python/topi/nn/flatten.py
index 63636b98c138e..11fe0d854cb26 100644
--- a/topi/python/topi/nn/flatten.py
+++ b/topi/python/topi/nn/flatten.py
@@ -20,7 +20,7 @@
 from tvm import te
 from .. import tag
 
-@tvm.tag_scope(tag=tag.INJECTIVE)
+@tvm.te.tag_scope(tag=tag.INJECTIVE)
 def flatten(data):
     """Flattens the input array into a 2-D array by collapsing the higher dimensions.
 
diff --git a/topi/python/topi/nn/mapping.py b/topi/python/topi/nn/mapping.py
index 101bbd80087c2..12558a8c33a2a 100644
--- a/topi/python/topi/nn/mapping.py
+++ b/topi/python/topi/nn/mapping.py
@@ -21,7 +21,7 @@
 from tvm import te
 from .. import tag
 
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
 def scale_shift_nchw(Input, Scale, Shift):
     """Batch normalization operator in inference.
 
@@ -44,7 +44,7 @@ def scale_shift_nchw(Input, Scale, Shift):
     return te.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift')
 
 
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
 def scale_shift_nhwc(Input, Scale, Shift):
     """Batch normalization operator in inference.
 
diff --git a/topi/python/topi/nn/pad.py b/topi/python/topi/nn/pad.py
index 6617c3aa8237a..8fe53374f2b56 100644
--- a/topi/python/topi/nn/pad.py
+++ b/topi/python/topi/nn/pad.py
@@ -21,7 +21,7 @@
 from ..util import equal_const_int
 from .. import tag
 
-@tvm.tag_scope(tag=tag.INJECTIVE+",pad")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",pad")
 def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"):
     """Pad Input with zeros.
 
@@ -56,9 +56,9 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"):
         raise ValueError("Input dimension and pad_after dismatch : %d vs %d" % (
             n, len(pad_before)))
     out_shape = tuple(
-        tvm.ir_pass.Simplify(
+        tvm.tir.ir_pass.Simplify(
             (data.shape[i] + pad_before[i] + pad_after[i])) for i in range(n))
-    pad_value = (pad_value if isinstance(pad_value, tvm.expr.PrimExpr)
+    pad_value = (pad_value if isinstance(pad_value, tvm.tir.PrimExpr)
                  else tvm.tir.const(pad_value, data.dtype))
     def _pad(*indices):
         not_zero = []
@@ -77,7 +77,7 @@ def _pad(*indices):
     return te.compute(out_shape, _pad, name=name)
 
 
-@tvm.tag_scope(tag=tag.INJECTIVE + ",pad")
+@tvm.te.tag_scope(tag=tag.INJECTIVE + ",pad")
 def mirror_pad(data,
                pad_before,
                pad_after=None,
@@ -116,7 +116,7 @@ def mirror_pad(data,
         raise ValueError("Input dimension and pad_after dismatch : %d vs %d" %
                          (n, len(pad_before)))
     out_shape = tuple(
-        tvm.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i]))
+        tvm.tir.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i]))
         for i in range(n))
     assert mode in ('SYMMETRIC', 'REFLECT')
     mode = int(mode == 'SYMMETRIC')
diff --git a/topi/python/topi/nn/softmax.py b/topi/python/topi/nn/softmax.py
index 8765a3558c366..c414372ade935 100644
--- a/topi/python/topi/nn/softmax.py
+++ b/topi/python/topi/nn/softmax.py
@@ -20,7 +20,7 @@
 import tvm
 from tvm import te
 
-@tvm.tag_scope(tag='softmax_output')
+@tvm.te.tag_scope(tag='softmax_output')
 def softmax(x, axis=-1):
     """Perform softmax activation on the data
 
@@ -78,7 +78,7 @@ def _normalize(exp, expsum, *indices):
                       name='T_softmax_norm', attrs={"axis" : axis})
 
 
-@tvm.tag_scope(tag='log_softmax_output')
+@tvm.te.tag_scope(tag='log_softmax_output')
 def log_softmax(x):
     """Perform log softmax activation on the data
 
diff --git a/topi/python/topi/nn/sparse.py b/topi/python/topi/nn/sparse.py
index c4f686e08d7df..b37bac2a213ae 100644
--- a/topi/python/topi/nn/sparse.py
+++ b/topi/python/topi/nn/sparse.py
@@ -158,7 +158,7 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr):
 
 def _csr_transpose_ir(data, indices, indptr, out_data, out_indices, out_indptr):
     """define ir for csr_transpose"""
-    irb = tvm.ir_builder.create()
+    irb = tvm.tir.ir_builder.create()
 
     data_ptr = irb.buffer_ptr(data)
     indices_ptr = irb.buffer_ptr(indices)
diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
index dc7afec4b2cac..8dc08949505d2 100644
--- a/topi/python/topi/sparse/csrmm.py
+++ b/topi/python/topi/sparse/csrmm.py
@@ -57,7 +57,7 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
     _, N = weight.shape
     def csrmm_default_ir(data, indices, indptr, weight, out):
         """define ir for csrmm"""
-        irb = tvm.ir_builder.create()
+        irb = tvm.tir.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
         indptr_ptr = irb.buffer_ptr(indptr)
diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py
index 82cc0c2fa14cb..c0aa1b41449cf 100644
--- a/topi/python/topi/sparse/csrmv.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -54,7 +54,7 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
     batch = indptr.shape[0]-1
     def csrmv_default_ir(data, indices, indptr, weight, out):
         """define ir for csrmv"""
-        irb = tvm.ir_builder.create()
+        irb = tvm.tir.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
         indptr_ptr = irb.buffer_ptr(indptr)
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index c3099b7df367b..9f01405b14f39 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -59,7 +59,7 @@ def dense_si(data, indices, indptr, weight, bias=None):
     def dense_default_ir(data, indices, indptr, weight, out):
         """Define IR for Dense"""
         dtype = data.dtype
-        irb = tvm.ir_builder.create()
+        irb = tvm.tir.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
         indptr_ptr = irb.buffer_ptr(indptr)
@@ -127,7 +127,7 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None):
     def dense_default_ir(data, w_data, w_indices, w_indptr, out):
         """Define IR for Dense"""
         dtype = data.dtype
-        irb = tvm.ir_builder.create()
+        irb = tvm.tir.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         w_data_ptr = irb.buffer_ptr(w_data)
         w_indices_ptr = irb.buffer_ptr(w_indices)
diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py
index 49c71bd3320d1..036191bc6ead6 100644
--- a/topi/python/topi/transform.py
+++ b/topi/python/topi/transform.py
@@ -158,7 +158,7 @@ def strided_slice(a, begin, end, strides=None):
         strides = []
     return cpp.strided_slice(a, begin, end, strides)
 
-@tvm.tag_scope(tag=tag.INJECTIVE+",strided_set")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",strided_set")
 def strided_set(a, v, begin, end, strides=None):
     """Set slice of an array.
 
diff --git a/topi/python/topi/util.py b/topi/python/topi/util.py
index 94c6068c7b216..681535761f83a 100644
--- a/topi/python/topi/util.py
+++ b/topi/python/topi/util.py
@@ -100,9 +100,9 @@ def get_const_int(expr):
     """
     if isinstance(expr, Integral):
         return expr
-    if not isinstance(expr, tvm.expr.IntImm):
-        expr = tvm.ir_pass.Simplify(expr)
-    if not isinstance(expr, tvm.expr.IntImm):
+    if not isinstance(expr, tvm.tir.IntImm):
+        expr = tvm.tir.ir_pass.Simplify(expr)
+    if not isinstance(expr, tvm.tir.IntImm):
         raise ValueError("Expect value to be constant int")
     return int(expr.value)
 
@@ -122,9 +122,9 @@ def get_const_float(expr):
     """
     if isinstance(expr, float):
         return float(expr)
-    if not isinstance(expr, tvm.expr.FloatImm):
-        expr = tvm.ir_pass.Simplify(expr)
-    if not isinstance(expr, tvm.expr.FloatImm):
+    if not isinstance(expr, tvm.tir.FloatImm):
+        expr = tvm.tir.ir_pass.Simplify(expr)
+    if not isinstance(expr, tvm.tir.FloatImm):
         raise ValueError("Expect value to be constant float")
     return float(expr.value)
 
@@ -144,9 +144,9 @@ def equal_const_int(expr, value):
     """
     if isinstance(expr, Integral):
         return expr == value
-    if not isinstance(expr, tvm.expr.IntImm):
-        expr = tvm.ir_pass.Simplify(expr)
-    if not isinstance(expr, tvm.expr.IntImm):
+    if not isinstance(expr, tvm.tir.IntImm):
+        expr = tvm.tir.ir_pass.Simplify(expr)
+    if not isinstance(expr, tvm.tir.IntImm):
         return False
     return expr.value == value
 
@@ -166,11 +166,11 @@ def get_const_tuple(in_tuple):
     """
     ret = []
     for elem in in_tuple:
-        if isinstance(elem, tvm.expr.Var):
+        if isinstance(elem, tvm.tir.Var):
             ret.append(elem)
-        elif not isinstance(elem, (tvm.expr.IntImm, int)):
-            elem = tvm.ir_pass.Simplify(elem)
-            if not isinstance(elem, tvm.expr.IntImm):
+        elif not isinstance(elem, (tvm.tir.IntImm, int)):
+            elem = tvm.tir.ir_pass.Simplify(elem)
+            if not isinstance(elem, tvm.tir.IntImm):
                 ret.append(elem)
         else:
             ret.append(get_const_int(elem))
@@ -206,7 +206,7 @@ def simplify(expr):
     out : Expr or int
         The simplified output
     """
-    return tvm.ir_pass.Simplify(expr) if isinstance(expr, tvm.expr.PrimExpr) else expr
+    return tvm.tir.ir_pass.Simplify(expr) if isinstance(expr, tvm.tir.PrimExpr) else expr
 
 
 def ravel_index(indices, shape):
@@ -214,7 +214,7 @@ def ravel_index(indices, shape):
 
     Parameters
     ----------
-    indices : tuple of int or tvm.expr.IntImm
+    indices : tuple of int or tvm.tir.IntImm
         The input coordinates
 
     shape : tuple of int
@@ -239,7 +239,7 @@ def unravel_index(idx, shape):
 
     Parameters
     ----------
-    idx : int or tvm.expr.IntImm
+    idx : int or tvm.tir.IntImm
         The 1D index
 
     shape : tuple of int
@@ -247,7 +247,7 @@ def unravel_index(idx, shape):
 
     Returns
     -------
-    indices : tuple of int or tvm.expr.IntImm
+    indices : tuple of int or tvm.tir.IntImm
         Corresponding coordinate of the 1D index
     """
     idxd = tvm.tir.indexdiv
@@ -283,9 +283,9 @@ def select_array(i, j):
         now = tvm.tir.const(0.0, dtype)
         for ii in range(row):
             for jj in range(col):
-                now = tvm.expr.Select(tvm.tir.all(idxm(i, row) == ii, idxm(j, col) == jj),
-                                      tvm.tir.const(matrix[ii][jj], dtype),
-                                      now)
+                now = tvm.tir.Select(tvm.tir.all(idxm(i, row) == ii, idxm(j, col) == jj),
+                                     tvm.tir.const(matrix[ii][jj], dtype),
+                                     now)
         return now
 
     return te.compute(matrix.shape, select_array, name=name)
@@ -378,12 +378,12 @@ def within_index(b, e, s, i):
         bool expression that is True is the array position would be selected
         by the index and False otherwise
     """
-    bc = tvm.expr.Select(s < 0, i <= e, i < b)
-    ec = tvm.expr.Select(s < 0, i > b, i >= e)
+    bc = tvm.tir.Select(s < 0, i <= e, i < b)
+    ec = tvm.tir.Select(s < 0, i > b, i >= e)
     ss = te.if_then_else(s < 0,
                          ((i - e) + (e % te.abs(s)) + 1) % te.abs(s),
                          (i - b) % s)
-    return tvm.expr.Select(tvm.expr.Or(bc, ec), tvm.tir.const(False), ss.equal(0))
+    return tvm.tir.Select(tvm.tir.Or(bc, ec), tvm.tir.const(False), ss.equal(0))
 
 
 def make_idx(b, e, s, z, i):
@@ -415,16 +415,16 @@ def make_idx(b, e, s, z, i):
     postion: Expr
         int expression that corresponds to an array position in the selection.
     """
-    bc = tvm.expr.Select(s < 0, i <= e, i < b)
-    ec = tvm.expr.Select(s < 0, i > b, i >= e)
+    bc = tvm.tir.Select(s < 0, i <= e, i < b)
+    ec = tvm.tir.Select(s < 0, i > b, i >= e)
 
     # Clamp to array size
-    b = tvm.expr.Select(z < b, z - 1, b)
+    b = tvm.tir.Select(z < b, z - 1, b)
 
     ss = tvm.tir.if_then_else(s < 0,
                               (b - i) // te.abs(s),
                               (i - b) // s)
-    return tvm.tir.if_then_else(tvm.expr.Or(bc, ec), 88, ss)
+    return tvm.tir.if_then_else(tvm.tir.Or(bc, ec), 88, ss)
 
 
 def is_empty_shape(shape):
diff --git a/topi/python/topi/vision/rcnn/proposal.py b/topi/python/topi/vision/rcnn/proposal.py
index 3d2c09e884e60..23bd24d22fb3b 100644
--- a/topi/python/topi/vision/rcnn/proposal.py
+++ b/topi/python/topi/vision/rcnn/proposal.py
@@ -104,7 +104,7 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r
     """
     batch, num_anchors, height, width = get_const_tuple(cls_prob_buf.shape)
     num_anchors //= 2
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
 
     p_score = ib.buffer_ptr(cls_prob_buf)
     p_delta = ib.buffer_ptr(bbox_pred_buf)
@@ -184,7 +184,7 @@ def argsort_ir(data_buf, out_index_buf):
         The result IR statement.
     """
     batch, num_bbox = get_const_tuple(data_buf.shape)
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     p_data = ib.buffer_ptr(data_buf)
     index_out = ib.buffer_ptr(out_index_buf)
     temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local")
@@ -246,7 +246,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
         return i / u
 
     batch, num_bbox = get_const_tuple(out_buf.shape)
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     p_data = ib.buffer_ptr(sorted_bbox_buf)
     p_out = ib.buffer_ptr(out_buf)
     with ib.for_range(0, batch, for_type="unroll", name="n") as b:
@@ -285,7 +285,7 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf):
     """
     batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape)
     rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch
-    ib = tvm.ir_builder.create()
+    ib = tvm.tir.ir_builder.create()
     i = ib.allocate('int32', (batch,), 'i', scope='local')
     p_sorted_bbox = ib.buffer_ptr(sorted_bbox_buf)
     p_remove = ib.buffer_ptr(remove_mask_buf)
diff --git a/topi/python/topi/x86/tensor_intrin.py b/topi/python/topi/x86/tensor_intrin.py
index f3e39c8198131..955b6b4ad280d 100644
--- a/topi/python/topi/x86/tensor_intrin.py
+++ b/topi/python/topi/x86/tensor_intrin.py
@@ -82,7 +82,7 @@ def dot_16x1x16_uint8_int8_int32_skylake():
 
     def _intrin_func(ins, outs):
         def _instr(index):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             if index == 1:
                 ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16')))
                 return ib.get()
@@ -110,8 +110,8 @@ def _instr(index):
         # body, reset, update
         return _instr(0), _instr(1), _instr(2)
 
-    with tvm.build_config(offset_factor=1, partition_const_loop=True):
-        return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+    with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+        return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
 
 
 def dot_16x1x16_uint8_int8_int16():
@@ -165,7 +165,7 @@ def dot_16x1x16_uint8_int8_int16():
 
     def _intrin_func(ins, outs):
         def _instr(index):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             if index == 1:
                 for i in range(4):
                     ib.emit(outs[0].vstore([i*32], tvm.tir.const(0, 'int16x32')))
@@ -192,8 +192,8 @@ def _instr(index):
         # body, reset, update
         return _instr(0), _instr(1), _instr(2)
 
-    with tvm.build_config(offset_factor=1, partition_const_loop=True):
-        return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+    with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+        return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
 
 
 def dot_16x1x16_uint8_int8_int32_cascadelake():
@@ -245,7 +245,7 @@ def dot_16x1x16_uint8_int8_int32_cascadelake():
 
     def _intrin_func(ins, outs):
         def _instr(index):
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             if index == 1:
                 ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16')))
                 return ib.get()
@@ -287,5 +287,5 @@ def _instr(index):
         # body, reset, update
         return _instr(0), _instr(1), _instr(2)
 
-    with tvm.build_config(offset_factor=1, partition_const_loop=True):
-        return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+    with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+        return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
diff --git a/topi/recipe/conv/depthwise_conv2d_test.py b/topi/recipe/conv/depthwise_conv2d_test.py
index 5498645f59920..a2b527356662c 100644
--- a/topi/recipe/conv/depthwise_conv2d_test.py
+++ b/topi/recipe/conv/depthwise_conv2d_test.py
@@ -129,7 +129,7 @@ def check_device(device):
         print("success")
 
     for device in ['cuda', 'opencl', 'rocm']:
-        with tvm.build_config(auto_unroll_max_step=128,
+        with tvm.target.build_config(auto_unroll_max_step=128,
                               unroll_explicit=device == 'rocm',
                               detect_global_barrier=False,
                               restricted_func=True):
@@ -218,7 +218,7 @@ def check_device(device):
         print("success")
 
     for device in ['cuda', 'opencl', 'rocm']:
-        with tvm.build_config(auto_unroll_max_step=128,
+        with tvm.target.build_config(auto_unroll_max_step=128,
                               detect_global_barrier=False,
                               restricted_func=True):
             check_device(device)
diff --git a/topi/recipe/conv/test_conv2d_hwcn_map.py b/topi/recipe/conv/test_conv2d_hwcn_map.py
index 47e1601f4487e..69bda79555a99 100644
--- a/topi/recipe/conv/test_conv2d_hwcn_map.py
+++ b/topi/recipe/conv/test_conv2d_hwcn_map.py
@@ -77,7 +77,7 @@ def check_device(device):
         w = tvm.nd.array(w_np, ctx)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
         c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
-        with tvm.build_config(auto_unroll_max_step=128,
+        with tvm.target.build_config(auto_unroll_max_step=128,
                               unroll_explicit=device == 'rocm'):
             func1 = tvm.build(s1, [A, W, B], device)
             func1(a, w, b)
diff --git a/topi/recipe/gemm/cuda_gemm_square.py b/topi/recipe/gemm/cuda_gemm_square.py
index 6e482b044a3c7..196bf72e23a33 100644
--- a/topi/recipe/gemm/cuda_gemm_square.py
+++ b/topi/recipe/gemm/cuda_gemm_square.py
@@ -146,7 +146,7 @@ def check_device(device):
         print("average time cost of %d runs = %g ms, %g GFLOPS." % (num_runs, t * 1e3, GFLOPS))
 
     for device in ["cuda", "opencl", "rocm", "nvptx", "vulkan"]:
-        with tvm.build_config(auto_unroll_max_step=128,
+        with tvm.target.build_config(auto_unroll_max_step=128,
                               unroll_explicit=(device != "cuda")):
             check_device(device)
 
diff --git a/topi/recipe/reduce/test_reduce_map.py b/topi/recipe/reduce/test_reduce_map.py
index 96f94305b5b4a..31f9bae7426c7 100644
--- a/topi/recipe/reduce/test_reduce_map.py
+++ b/topi/recipe/reduce/test_reduce_map.py
@@ -64,7 +64,7 @@ def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):
     else:
         raise NotImplementedError
     s = topi.cuda.schedule_reduce(B)
-    with tvm.build_config(auto_unroll_max_step=16,
+    with tvm.target.build_config(auto_unroll_max_step=16,
                           auto_unroll_min_depth=0):
         fcuda = tvm.build(s, [A, B], "cuda", name="sum")
 
diff --git a/topi/recipe/rnn/lstm.py b/topi/recipe/rnn/lstm.py
index 172362fdda148..4076eb6a4614c 100644
--- a/topi/recipe/rnn/lstm.py
+++ b/topi/recipe/rnn/lstm.py
@@ -188,7 +188,7 @@ def check_device(target):
         print("Time cost=%g" % eval_result.mean)
 
     # set unroll_explicit for more readable code.
-    with tvm.build_config(
+    with tvm.target.build_config(
             detect_global_barrier=DETECT_GLOBAL_BARRIER,
             auto_unroll_max_step=128,
             unroll_explicit=False):
diff --git a/topi/recipe/rnn/matexp.py b/topi/recipe/rnn/matexp.py
index 94ec5bc392f1d..9991895ec8dc3 100644
--- a/topi/recipe/rnn/matexp.py
+++ b/topi/recipe/rnn/matexp.py
@@ -127,7 +127,7 @@ def rnn_matexp():
     s[SS].bind(tx, thread_x)
 
     def check_device(target):
-        with tvm.build_config(
+        with tvm.target.build_config(
                 detect_global_barrier=detect_global_barrier,
                 auto_unroll_max_step=128,
                 unroll_explicit=False):
diff --git a/tutorials/dev/low_level_custom_pass.py b/tutorials/dev/low_level_custom_pass.py
index f9b3ea3c8245c..298b24f6d046e 100644
--- a/tutorials/dev/low_level_custom_pass.py
+++ b/tutorials/dev/low_level_custom_pass.py
@@ -72,7 +72,7 @@
 #
 # IR Visitor
 # ~~~~~~~~~~
-# We can use ``tvm.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR.
+# We can use ``tvm.tir.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR.
 # ``func`` is a function callback. This function will be called before exiting the current IR node,
 # i.e. post-order visit. Then we leverage side effects to store the result of IR visit, because the
 # return value of ``func`` will be ignored.
@@ -113,7 +113,7 @@ def vectorize8(op):
         extent = op.extent.value
         name = op.loop_var.name
         lo, li = te.var(name + '.outer'), te.var(name + '.inner')
-        body = tvm.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li})
+        body = tvm.tir.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li})
         body = tvm.tir.For(li, 0, 8, tvm.tir.For.Vectorized, 0, body)
         body = tvm.tir.For(lo, 0, extent // 8, tvm.tir.For.Serial, 0, body)
         return body
@@ -122,14 +122,14 @@ def vectorize8(op):
 def vectorize(stmt):
     global loops
 
-    tvm.ir_pass.PostOrderVisit(stmt, find_width8)
+    tvm.tir.ir_pass.PostOrderVisit(stmt, find_width8)
 
     if not loops:
         return stmt
 
     # The last list arugment indicates what kinds of nodes will be transformed.
     # Thus, in this case only `For` nodes will call `vectorize8`
-    stmt = tvm.ir_pass.IRTransform(stmt, None, vectorize8, ['For'])
+    stmt = tvm.tir.ir_pass.IRTransform(stmt, None, vectorize8, ['For'])
 
     return stmt
 
@@ -159,15 +159,15 @@ def vectorize(stmt):
 # Thus, a good place to put this transformation pass is just after Phase 1.
 #
 
-with tvm.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
+with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
     print(tvm.lower(sch, [a, b, c], simple_mode=True))
 
 #####################################################################
 # Quick View
 # ----------
 # This tutorial gives a quick view of writing a customized IR transformation pass:
-# - Use ``tvm.ir_pass.PostOrderVisit`` to gather information on each IR nodes.
-# - Use ``tvm.ir_pass.IRTransform`` to transform IR nodes.
+# - Use ``tvm.tir.ir_pass.PostOrderVisit`` to gather information on each IR nodes.
+# - Use ``tvm.tir.ir_pass.IRTransform`` to transform IR nodes.
 # - Wrap up two above to write an IR-transformation function.
-# - Use ``tvm.build_config`` to put this function to TVM lowering pass
+# - Use ``tvm.target.build_config`` to put this function to TVM lowering pass
 #
diff --git a/tutorials/language/tensorize.py b/tutorials/language/tensorize.py
index d80a7ca584450..4290606268375 100644
--- a/tutorials/language/tensorize.py
+++ b/tutorials/language/tensorize.py
@@ -106,7 +106,7 @@ def intrin_gemv(m, l):
                          offset_factor=1,
                          strides=[1])
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         aa, bb = ins
         cc = outs[0]
         ib.emit(tvm.tir.call_extern("int32", "gemv_update",
@@ -115,11 +115,11 @@ def intrin_func(ins, outs):
                                 bb.access_ptr("r"),
                                 m, l, bb.strides[0]))
         return ib.get()
-    with tvm.build_config(offset_factor=1):
-        return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
+    with tvm.target.build_config(offset_factor=1):
+        return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
 
 ######################################################################
-# Here :code:`tvm.decl_tensor_intrin` declares how to execute the computation :code:`c.op`.
+# Here :code:`te.decl_tensor_intrin` declares how to execute the computation :code:`c.op`.
 # Our implementation simply takes the inputs and outputs,
 # converts them to pointers and emit an external function call.
 # Note that tensorization requires user to specify :code:`offset_factor`,
@@ -255,7 +255,7 @@ def intrin_func(ins, outs):
         aa, bb = ins
         cc = outs[0]
         def _body():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_extern("int32", "gemv_update",
                                     cc.access_ptr("w"),
                                     aa.access_ptr("r"),
@@ -263,14 +263,14 @@ def _body():
                                     m, l, bb.strides[0]))
             return ib.get()
         def _reduce_reset():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m))
             return ib.get()
         def _reduce_update():
             return _body()
         return _body(), _reduce_reset(), _reduce_update()
-    with tvm.build_config(offset_factor=1):
-        return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
+    with tvm.target.build_config(offset_factor=1):
+        return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
 
 ######################################################################
 # Note that :code:`intrin_func` now returns a triplet:
diff --git a/tutorials/optimize/opt_conv_tensorcore.py b/tutorials/optimize/opt_conv_tensorcore.py
index 2fa4fd7ef33b5..44b9de3b99ffe 100644
--- a/tutorials/optimize/opt_conv_tensorcore.py
+++ b/tutorials/optimize/opt_conv_tensorcore.py
@@ -159,7 +159,7 @@ def intrin_wmma_load_matrix(scope):
     BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256)
 
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
 
         BA = ins[0]
         BC = outs[0]
@@ -168,7 +168,7 @@ def intrin_func(ins, outs):
                                 BA.access_ptr('r'), n, 'row_major'))
         return ib.get()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
 
 
 def intrin_wmma_gemm():
@@ -189,12 +189,12 @@ def intrin_func(ins, outs):
         BC, = outs
 
         def init():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
             return ib.get()
 
         def update():
-            ib = tvm.ir_builder.create()
+            ib = tvm.tir.ir_builder.create()
             ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
                                     BC.data, BC.elem_offset // 256,
                                     BA.data, BA.elem_offset // 256,
@@ -204,7 +204,7 @@ def update():
 
         return update(), init(), update()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
 
 
 def intrin_wmma_store_matrix():
@@ -215,7 +215,7 @@ def intrin_wmma_store_matrix():
     BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256)
 
     def intrin_func(ins, outs):
-        ib = tvm.ir_builder.create()
+        ib = tvm.tir.ir_builder.create()
         BA = ins[0]
         BC = outs[0]
         ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync',
@@ -223,7 +223,7 @@ def intrin_func(ins, outs):
                                 BC.access_ptr('w'), n, 'row_major'))
         return ib.get()
 
-    return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+    return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
 
 ###############################################################################
 # Scheduling the Computation
@@ -331,7 +331,7 @@ def intrin_func(ins, outs):
 
 ctx = tvm.gpu(0)
 if nvcc.have_tensorcore(ctx.compute_version):
-    with tvm.build_config(auto_unroll_max_step=16):
+    with tvm.target.build_config(auto_unroll_max_step=16):
         func = tvm.build(s, [A, W, Conv], 'cuda')
     a_np = np.random.uniform(size=data_shape).astype(A.dtype)
     w_np = np.random.uniform(size=kernel_shape).astype(W.dtype)
diff --git a/tutorials/optimize/opt_matmul_auto_tensorcore.py b/tutorials/optimize/opt_matmul_auto_tensorcore.py
index ce983fba660ef..490ccdb9373aa 100644
--- a/tutorials/optimize/opt_matmul_auto_tensorcore.py
+++ b/tutorials/optimize/opt_matmul_auto_tensorcore.py
@@ -287,7 +287,7 @@ def tune_and_evaluate(M, N, L, dtype, layout):
   print(best_config)
   with autotvm.apply_history_best('matmul.log'):
     with tvm.target.create("cuda"):
-        with tvm.build_config():
+        with tvm.target.build_config():
             s, arg_bufs = test_gemm(N, L, M, dtype, layout)
             print(tvm.lower(s, arg_bufs, simple_mode=True))
             func = tvm.build(s, arg_bufs)
diff --git a/vta/python/vta/build_module.py b/vta/python/vta/build_module.py
index 8633a9b07156c..4c33d36d69b56 100644
--- a/vta/python/vta/build_module.py
+++ b/vta/python/vta/build_module.py
@@ -24,13 +24,13 @@
 def lift_coproc_scope(x):
     """Lift coprocessings cope to the """
     x = ir_pass.lift_alloc_to_scope_begin(x)
-    x = tvm.ir_pass.LiftAttrScope(x, "coproc_scope", False)
+    x = tvm.tir.ir_pass.LiftAttrScope(x, "coproc_scope", False)
     return x
 
 def early_rewrite(stmt):
     """Try to do storage rewrite in early pass."""
     try:
-        return tvm.ir_pass.StorageRewrite(stmt)
+        return tvm.tir.ir_pass.StorageRewrite(stmt)
     except tvm.error.TVMError:
         return stmt
 
@@ -71,17 +71,17 @@ def add_debug(stmt):
                  (1, ir_pass.inject_dma_intrin),
                  (1, ir_pass.inject_skip_copy),
                  (1, ir_pass.annotate_alu_coproc_scope),
-                 (1, lambda x: tvm.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)),
+                 (1, lambda x: tvm.tir.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)),
                  (1, lift_coproc_scope),
                  (1, ir_pass.inject_coproc_sync),
                  (1, early_rewrite)]
     if debug_flag:
         pass_list.append((1, add_debug))
     pass_list.append((2, ir_pass.inject_alu_intrin))
-    pass_list.append((3, tvm.ir_pass.LowerStorageAccessInfo))
+    pass_list.append((3, tvm.tir.ir_pass.LowerStorageAccessInfo))
     pass_list.append((3, ir_pass.fold_uop_loop))
     pass_list.append((3, ir_pass.cpu_access_rewrite))
-    return tvm.build_config(add_lower_pass=pass_list, **kwargs)
+    return tvm.target.build_config(add_lower_pass=pass_list, **kwargs)
 
 
 def lower(*args, **kwargs):
diff --git a/vta/python/vta/intrin.py b/vta/python/vta/intrin.py
index b1ed7a13fa622..8532ffa318b56 100644
--- a/vta/python/vta/intrin.py
+++ b/vta/python/vta/intrin.py
@@ -75,7 +75,7 @@ def intrin_func(ins, outs):
         dout = outs[0]
         def instr(index):
             """Generate matrix-matrix multiply VTA instruction"""
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             dev = env.dev
             irb.scope_attr(dev.vta_axis, "coproc_scope",
                            dev.get_task_qid(dev.QID_COMPUTE))
@@ -104,8 +104,8 @@ def instr(index):
             return (nop, nop, nop)
         return (instr(0), instr(1), instr(2))
 
-    return tvm.decl_tensor_intrin(out.op, intrin_func,
-                                  name="GEMM",
-                                  binds={inp: inp_layout,
-                                         wgt: wgt_layout,
-                                         out: out_layout})
+    return te.decl_tensor_intrin(out.op, intrin_func,
+                                 name="GEMM",
+                                 binds={inp: inp_layout,
+                                        wgt: wgt_layout,
+                                        out: out_layout})
diff --git a/vta/python/vta/ir_pass.py b/vta/python/vta/ir_pass.py
index 723558357e6aa..4f8deff285a64 100644
--- a/vta/python/vta/ir_pass.py
+++ b/vta/python/vta/ir_pass.py
@@ -83,7 +83,7 @@ def _post_order(op):
                         fail[0] = True
                         return op
                     if gemm_offsets[i] is not None:
-                        if not tvm.ir_pass.Equal(m[0], gemm_offsets[i]):
+                        if not tvm.tir.ir_pass.Equal(m[0], gemm_offsets[i]):
                             fail[0] = True
                             return op
                         args.append(m[1])
@@ -96,14 +96,14 @@ def _post_order(op):
                 raise RuntimeError("unexpected op %s" % op)
             return op
 
-        ret = tvm.ir_pass.IRTransform(
+        ret = tvm.tir.ir_pass.IRTransform(
             stmt.body, None, _post_order, ["Call"])
 
         if not fail[0] and all(x is not None for x in gemm_offsets):
             def _visit(op):
                 if op.same_as(loop_var):
                     fail[0] = True
-            tvm.ir_pass.PostOrderVisit(ret, _visit)
+            tvm.tir.ir_pass.PostOrderVisit(ret, _visit)
             if not fail[0]:
                 begin = tvm.tir.call_extern(
                     "int32", "VTAUopLoopBegin", stmt.extent, *gemm_offsets)
@@ -138,7 +138,7 @@ def _do_fold(stmt):
             return tvm.tir.AttrStmt(
                 stmt.node, stmt.attr_key, stmt.value, body)
         return None
-    out = tvm.ir_pass.IRTransform(
+    out = tvm.tir.ir_pass.IRTransform(
         stmt_in, _do_fold, None, ["AttrStmt"])
     return out
 
@@ -194,7 +194,7 @@ def _post_order(op):
             new_var = rw_info[buffer_var]
             return tvm.tir.Store(new_var, op.value, op.index)
         raise RuntimeError("not reached")
-    stmt = tvm.ir_pass.IRTransform(
+    stmt = tvm.tir.ir_pass.IRTransform(
         stmt_in, None, _post_order, ["Allocate", "Load", "Store"])
     for buffer_var, new_var in rw_info.items():
         stmt = tvm.tir.LetStmt(
@@ -260,7 +260,7 @@ def _post_order(op):
         if isinstance(op, tvm.tir.For):
             return _merge_block(lift_stmt.pop() + [op], op.body)
         raise RuntimeError("not reached")
-    stmt = tvm.ir_pass.IRTransform(
+    stmt = tvm.tir.ir_pass.IRTransform(
         stmt_in, _pre_order, _post_order, ["Allocate", "AttrStmt", "For"])
     assert len(lift_stmt) == 1
     return _merge_block(lift_stmt[0], stmt)
@@ -283,7 +283,7 @@ def _do_fold(stmt):
         if _match_pragma(stmt, "skip_dma_copy"):
             return tvm.tir.Evaluate(0)
         return None
-    return tvm.ir_pass.IRTransform(
+    return tvm.tir.ir_pass.IRTransform(
         stmt_in, _do_fold, None, ["AttrStmt"])
 
 
@@ -314,9 +314,9 @@ def _do_fold(stmt):
                 op.loop_var, op.min, 2, op.for_type,
                 op.device_api, op.body)
         return None
-    stmt = tvm.ir_pass.IRTransform(
+    stmt = tvm.tir.ir_pass.IRTransform(
         stmt_in, None, _do_fold, ["AttrStmt"])
-    stmt = tvm.ir_pass.CoProcSync(stmt)
+    stmt = tvm.tir.ir_pass.CoProcSync(stmt)
     return stmt
 
 
@@ -381,7 +381,7 @@ def _fold_buffer_dim(buf, scope, elem_block):
                     break
                 x_size = x_size * buf.shape[k]
                 next_base = i + 1
-            shape.append(tvm.ir_pass.Simplify(x_size))
+            shape.append(tvm.tir.ir_pass.Simplify(x_size))
             strides.append(x_stride)
             assert next_base != base
             base = next_base
@@ -492,7 +492,7 @@ def _inject_copy(src, dst, pad_before, pad_after, pad_value):
             _check_compact(src)
             x_size, y_size, x_stride, offset = _get_2d_pattern(
                 dst, elem_width, elem_bytes, data_type, src.scope, allow_fold=True)
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             irb.scope_attr(env.dev.vta_axis, "coproc_scope",
                            env.dev.get_task_qid(task_qid))
             irb.emit(tvm.tir.call_extern(
@@ -562,7 +562,7 @@ def _inject_copy(src, dst, pad_before, pad_after, pad_value):
                 src, elem_width, elem_bytes, data_type,
                 dst.scope, allow_fold=allow_fold)
 
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             irb.scope_attr(env.dev.vta_axis, "coproc_scope",
                            env.dev.get_task_qid(task_qid))
 
@@ -578,7 +578,7 @@ def _inject_copy(src, dst, pad_before, pad_after, pad_value):
         else:
             raise RuntimeError("Do not support copy %s->%s" % (src.scope, dst.scope))
 
-    return tvm.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy)
+    return tvm.tir.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy)
 
 
 def _get_gemm_intrin_buffer():
@@ -649,11 +649,11 @@ def _find_basics(op):
     def _do_fold(op):
         if _match_pragma(op, "conv2d_transpose_gemm"):
             is_init = ".init" in str(op)
-            tvm.ir_pass.PostOrderVisit(op, _find_basics)
+            tvm.tir.ir_pass.PostOrderVisit(op, _find_basics)
 
             if is_init:
                 # create inner most block
-                irb = tvm.ir_builder.create()
+                irb = tvm.tir.ir_builder.create()
                 dev = env.dev
                 irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE))
                 irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop)
@@ -665,7 +665,7 @@ def _do_fold(op):
                 inner = irb.get()
                 # TODO(@tmoreau89): This is only a temporary fix, please take a look.
                 body = op.body.body
-                while isinstance(body, tvm.stmt.IfThenElse):
+                while isinstance(body, tvm.tir.IfThenElse):
                     body = body.then_case
                 args = body.args
                 res_tensor = body.func.output(0)
@@ -686,7 +686,7 @@ def _do_fold(op):
                     condition = tvm.tir.const(1, 'int')
 
                 # create inner most block
-                irb = tvm.ir_builder.create()
+                irb = tvm.tir.ir_builder.create()
                 with irb.if_scope(condition):
                     dev = env.dev
                     irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE))
@@ -719,7 +719,7 @@ def _do_fold(op):
                     tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner)
                 return inner
         return None
-    ret = tvm.ir_pass.IRTransform(
+    ret = tvm.tir.ir_pass.IRTransform(
         stmt_in, _do_fold, None, ["AttrStmt"])
     return ret
 
@@ -740,7 +740,7 @@ def annotate_alu_coproc_scope(stmt_in):
     env = get_env()
     def _do_fold(stmt):
         if _match_pragma(stmt, "alu"):
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             irb.scope_attr(env.dev.vta_axis, "coproc_scope",
                            env.dev.get_task_qid(env.dev.QID_COMPUTE))
             irb.scope_attr(env.dev.vta_axis, "coproc_uop_scope",
@@ -751,7 +751,7 @@ def _do_fold(stmt):
             return tvm.tir.Evaluate(0)
         return stmt
 
-    stmt_out = tvm.ir_pass.IRTransform(
+    stmt_out = tvm.tir.ir_pass.IRTransform(
         stmt_in, None, _do_fold, ["AttrStmt"])
 
     return stmt_out
@@ -775,7 +775,7 @@ def inject_alu_intrin(stmt_in):
 
     def _do_fold(stmt):
         def _equal(x, y):
-            return tvm.ir_pass.Equal(tvm.ir_pass.Simplify(x - y), 0)
+            return tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(x - y), 0)
 
         def _flatten_loop(src_coeff, dst_coeff, extents):
             src_coeff = list(src_coeff)
@@ -794,7 +794,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
                 next_ext = extents.pop()
 
                 if _equal(next_src, vsrc * vext) and _equal(next_dst, vdst * vext):
-                    vext = tvm.ir_pass.Simplify(vext * next_ext)
+                    vext = tvm.tir.ir_pass.Simplify(vext * next_ext)
                 else:
                     rev_src_coeff.append(vsrc)
                     rev_dst_coeff.append(vdst)
@@ -854,7 +854,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
                 if loop_body.value.name == 'shift_left':
                     alu_opcode = env.dev.ALU_OPCODE_SHR
                     lhs = loop_body.value.args[0]
-                    rhs = tvm.ir_pass.Simplify(-loop_body.value.args[1])
+                    rhs = tvm.tir.ir_pass.Simplify(-loop_body.value.args[1])
                 elif loop_body.value.name == 'shift_right':
                     alu_opcode = env.dev.ALU_OPCODE_SHR
                     lhs = loop_body.value.args[0]
@@ -895,9 +895,9 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
                 lhs_equal = True
                 rhs_equal = True
                 for i, coef in enumerate(dst_coeff):
-                    if not tvm.ir_pass.Equal(coef, src_lhs_coeff[i]):
+                    if not tvm.tir.ir_pass.Equal(coef, src_lhs_coeff[i]):
                         lhs_equal = False
-                    if not tvm.ir_pass.Equal(coef, src_rhs_coeff[i]):
+                    if not tvm.tir.ir_pass.Equal(coef, src_rhs_coeff[i]):
                         rhs_equal = False
                 # Make sure at least one of the source is identical to the
                 # destination (in-place computation)
@@ -916,20 +916,20 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
             assert len(src_coeff) > 1
             assert len(dst_coeff) > 1
             assert len(extents) != 0
-            assert tvm.ir_pass.Equal(
-                tvm.ir_pass.Simplify(
+            assert tvm.tir.ir_pass.Equal(
+                tvm.tir.ir_pass.Simplify(
                     idxm(src_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0)
-            assert tvm.ir_pass.Equal(
-                tvm.ir_pass.Simplify(
+            assert tvm.tir.ir_pass.Equal(
+                tvm.tir.ir_pass.Simplify(
                     idxm(dst_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0)
-            assert tvm.ir_pass.Equal(src_coeff[-2], 1)
-            assert tvm.ir_pass.Equal(dst_coeff[-2], 1)
+            assert tvm.tir.ir_pass.Equal(src_coeff[-2], 1)
+            assert tvm.tir.ir_pass.Equal(dst_coeff[-2], 1)
             if env.BATCH > 1:
                 assert len(src_coeff) > 2
                 assert len(dst_coeff) > 2
                 assert len(extents) > 1
-                assert tvm.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT)
-                assert tvm.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT)
+                assert tvm.tir.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT)
+                assert tvm.tir.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT)
 
             # Apply tensorization of the loop coefficients
             src_offset = src_coeff[-1]
@@ -945,16 +945,16 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
             src_coeff.append(src_offset)
             dst_coeff.append(dst_offset)
             src_coeff = [
-                tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff]
+                tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff]
             dst_coeff = [
-                tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff]
+                tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff]
 
             # Flatten the outer loops
             if extents:
                 src_coeff, dst_coeff, extents = _flatten_loop(src_coeff, dst_coeff, extents)
 
             # Insert ALU micro-ops
-            irb = tvm.ir_builder.create()
+            irb = tvm.tir.ir_builder.create()
             for idx, extent in enumerate(extents):
                 irb.emit(tvm.tir.call_extern(
                     "int32", "VTAUopLoopBegin",
@@ -973,7 +973,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
             return irb.get()
         return stmt
 
-    stmt_out = tvm.ir_pass.IRTransform(
+    stmt_out = tvm.tir.ir_pass.IRTransform(
         stmt_in, None, _do_fold, ["AttrStmt"])
     return stmt_out
 
diff --git a/vta/python/vta/top/op.py b/vta/python/vta/top/op.py
index fe89341d9df80..2198ed4c191f7 100644
--- a/vta/python/vta/top/op.py
+++ b/vta/python/vta/top/op.py
@@ -45,7 +45,7 @@ def compute_clip_vta(attrs, inputs, output_type):
     a_max = attrs.a_max
     const_min = tvm.tir.const(a_min, x.dtype)
     const_max = tvm.tir.const(a_max, x.dtype)
-    with tvm.tag_scope(topi.tag.ELEMWISE):
+    with tvm.te.tag_scope(topi.tag.ELEMWISE):
         x = te.compute(
             x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
         x = te.compute(
diff --git a/vta/scripts/tune_conv2d.py b/vta/scripts/tune_conv2d.py
index ff02485b515bd..6d0b5d435b3ba 100644
--- a/vta/scripts/tune_conv2d.py
+++ b/vta/scripts/tune_conv2d.py
@@ -49,7 +49,7 @@
     ('resnet-18.C11', Workload(env.BATCH,   7,   7, 512, 512, 3, 3, 1, 1, 1, 1)),
 ]
 
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/scripts/tune_conv2d_transpose.py b/vta/scripts/tune_conv2d_transpose.py
index f09ba4d2566f7..087136797c5ac 100644
--- a/vta/scripts/tune_conv2d_transpose.py
+++ b/vta/scripts/tune_conv2d_transpose.py
@@ -42,7 +42,7 @@
     ('DCGAN.CT3', Workload(env.BATCH, 16, 16,  256, 128, 4, 4, 1, 1, 2, 2)),
 ]
 
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/scripts/tune_dense.py b/vta/scripts/tune_dense.py
index d738c99965bd2..e54de1d4ea70b 100644
--- a/vta/scripts/tune_dense.py
+++ b/vta/scripts/tune_dense.py
@@ -38,7 +38,7 @@
     ('lstm.dense.4',  Workload(4, 256, 128)),
 ]
 
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/scripts/tune_group_conv2d.py b/vta/scripts/tune_group_conv2d.py
index 1119d273a6b9d..72f9525320eff 100644
--- a/vta/scripts/tune_group_conv2d.py
+++ b/vta/scripts/tune_group_conv2d.py
@@ -47,7 +47,7 @@
     ('mobilenet.D9', Workload(env.BATCH,   7,  7,  1024, 1024, 64, 3, 3, 1, 1, 1, 1)),
 ]
 
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index 10af0ab079878..1de35c0242032 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -61,7 +61,7 @@ def parse_arguments():
 def register_vta_tuning_tasks():
     from tvm.autotvm.task.topi_integration import TaskExtractEnv, deserialize_args
 
-    @tvm.tag_scope(tag=topi.tag.ELEMWISE)
+    @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
     def my_clip(x, a_min, a_max):
         """Unlike topi's current clip, put min and max into two stages."""
         const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_conv2d.py
index dcffed9993529..b3c36e85d56b1 100644
--- a/vta/tests/python/integration/test_benchmark_topi_conv2d.py
+++ b/vta/tests/python/integration/test_benchmark_topi_conv2d.py
@@ -62,7 +62,7 @@
 ]
 
 # FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
index 45a601742dce0..90cc21fc8405f 100644
--- a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
+++ b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py
@@ -54,7 +54,7 @@
 ]
 
 # FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/tests/python/integration/test_benchmark_topi_dense.py b/vta/tests/python/integration/test_benchmark_topi_dense.py
index f9451f5f77f00..95c491a6d7239 100644
--- a/vta/tests/python/integration/test_benchmark_topi_dense.py
+++ b/vta/tests/python/integration/test_benchmark_topi_dense.py
@@ -36,7 +36,7 @@
 from vta.testing import simulator
 
 # FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py
index 3670eb4ec6738..1d5838ce8cda8 100644
--- a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py
+++ b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py
@@ -58,7 +58,7 @@
 ]
 
 # FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def my_clip(x, a_min, a_max):
     """Unlike topi's current clip, put min and max into two stages."""
     const_min = tvm.tir.const(a_min, x.dtype)
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index 0aa6343d01c4d..16c8b3e1ad884 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -298,7 +298,7 @@ def tune_tasks(tasks,
 def register_vta_tuning_tasks():
     from tvm.autotvm.task import TaskExtractEnv
 
-    @tvm.tag_scope(tag=topi.tag.ELEMWISE)
+    @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
     def my_clip(x, a_min, a_max):
         """Unlike topi's current clip, put min and max into two stages."""
         const_min = tvm.tir.const(a_min, x.dtype)