apache
diff --git a/‎CMakeLists.txt‎
Lines changed: 13 additions & 98 deletions b/‎CMakeLists.txt‎
Lines changed: 13 additions & 98 deletions
diff --git a/‎Makefile‎
Lines changed: 1 addition & 2 deletions b/‎Makefile‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 13 additions & 13 deletions b/‎README.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 0 additions & 3 deletions b/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎apps/android_rpc/tests/android_rpc_test.py‎
Lines changed: 7 additions & 52 deletions b/‎apps/android_rpc/tests/android_rpc_test.py‎
Lines changed: 7 additions & 52 deletions
@@ -57,20 +57,13 @@ tvm_option(USE_THREADS "Build with thread support" ON)
 tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
 tvm_option(USE_MLIR "Build with MLIR support" OFF)
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
-tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
-tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
-tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
-tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
-tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
 tvm_option(TVM_DEBUG_WITH_ABI_CHANGE "Enable debug code that may cause ABI changes" OFF)
 tvm_option(TVM_LOG_BEFORE_THROW "Whether log before throw, for debugging purposes" OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
 tvm_option(USE_MSVC_MT "Build with MT" OFF)
 tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF)
 tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF)
-tvm_option(USE_TF_TVMDSOOP "Build with TensorFlow TVMDSOOp" OFF)
-tvm_option(USE_PT_TVMDSOOP "Build with PyTorch TVMDSOOp" OFF)
 tvm_option(USE_FALLBACK_STL_MAP "Use TVM's POD compatible Map" OFF)
 tvm_option(INDEX_DEFAULT_I64 "Defaults the index datatype to int64" ON)
 tvm_option(USE_LIBBACKTRACE "Use libbacktrace to supply linenumbers on stack traces" AUTO)
@@ -335,34 +328,6 @@ tvm_file_glob(GLOB CODEGEN_SRCS
 
 list(APPEND COMPILER_SRCS ${CODEGEN_SRCS})
 
-tvm_file_glob(GLOB_RECURSE RELAY_OP_SRCS
-    src/relay/op/*.cc
-    )
-tvm_file_glob(GLOB_RECURSE RELAY_PASS_SRCS
-    src/relay/analysis/*.cc
-    src/relay/collage/*.cc
-    src/relay/transforms/*.cc
-    src/relay/quantize/*.cc
-    )
-tvm_file_glob(GLOB RELAY_BACKEND_SRCS
-    src/relay/backend/*.cc
-    src/relay/backend/vm/*.cc
-    src/relay/backend/aot/*.cc
-    )
-tvm_file_glob(GLOB_RECURSE RELAY_IR_SRCS
-    src/relay/ir/*.cc
-    src/relay/printer/*.cc
-    src/relay/parser/*.cc
-    )
-tvm_file_glob(GLOB_RECURSE RELAY_QNN_SRCS
-    src/relay/qnn/*.cc
-)
-list(APPEND COMPILER_SRCS ${RELAY_OP_SRCS})
-list(APPEND COMPILER_SRCS ${RELAY_PASS_SRCS})
-list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS})
-list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS})
-list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS})
-
 tvm_file_glob(GLOB DATATYPE_SRCS src/target/datatype/*.cc)
 list(APPEND COMPILER_SRCS ${DATATYPE_SRCS})
 list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc")
@@ -418,51 +383,6 @@ else()
   list(APPEND COMPILER_SRCS ${STACKVM_RUNTIME_SRCS})
 endif(USE_STACKVM_RUNTIME)
 
-# NOTE(areusch): USE_GRAPH_RUNTIME will be deleted in a future release
-if(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
-  message(WARNING "USE_GRAPH_RUNTIME renamed to USE_GRAPH_EXECUTOR. Please update your config.cmake")
-  set(USE_GRAPH_EXECUTOR ${USE_GRAPH_RUNTIME})
-  unset(USE_GRAPH_RUNTIME CACHE)
-endif(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
-
-# NOTE(areusch): USE_GRAPH_RUNTIME_DEBUG will be deleted in a future release
-if(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_PROFILER)
-  message(WARNING "USE_GRAPH_RUNTIME_DEBUG renamed to USE_PROFILER. Please update your config.cmake")
-  set(USE_PROFILER ${USE_GRAPH_RUNTIME_DEBUG})
-  unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
-endif(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_PROFILER)
-
-if(USE_GRAPH_EXECUTOR)
-  message(STATUS "Build with Graph Executor support...")
-  tvm_file_glob(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
-
-endif(USE_GRAPH_EXECUTOR)
-
-# convert old options for profiler
-if(USE_GRAPH_EXECUTOR_DEBUG)
-  message(WARNING "USE_GRAPH_EXECUTOR_DEBUG renamed to USE_PROFILER. Please update your config.cmake")
-  unset(USE_GRAPH_EXECUTOR_DEBUG CACHE)
-  set(USE_PROFILER ON)
-endif()
-if(USE_VM_PROFILER)
-  message(WARNING "USE_VM_PROFILER renamed to USE_PROFILER. Please update your config.cmake")
-  unset(USE_VM_PROFILER CACHE)
-  set(USE_PROFILER ON)
-endif()
-
-if(USE_PROFILER)
-  message(STATUS "Build with profiler...")
-
-  tvm_file_glob(GLOB RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS src/runtime/graph_executor/debug/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS})
-  set_source_files_properties(${RUNTIME_GRAPH_EXECUTOR_SRCS}
-    PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_EXECUTOR_DEBUG")
-
-  tvm_file_glob(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
-endif(USE_PROFILER)
-
 if(USE_CUDA AND USE_NCCL)
   message(STATUS "Build with NCCL...")
   find_nccl(${USE_NCCL})
@@ -493,13 +413,6 @@ if(USE_ROCM AND USE_RCCL)
   list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC})
 endif()
 
-if(USE_AOT_EXECUTOR)
-  message(STATUS "Build with AOT Executor support...")
-  file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})
-
-endif(USE_AOT_EXECUTOR)
-
 # Enable ctest if gtest is available
 if(USE_GTEST)
   # Check env var for backward compatibility. A better way to specify package
@@ -538,12 +451,6 @@ if(USE_GTEST)
   endif()
 endif()
 
-if(USE_PIPELINE_EXECUTOR)
-  message(STATUS "Build with Pipeline Executor support...")
-  tvm_file_glob(GLOB RUNTIME_PIPELINE_SRCS src/runtime/pipeline/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_PIPELINE_SRCS})
-endif(USE_PIPELINE_EXECUTOR)
-
 if(USE_KALLOC_ALIGNMENT)
   message(STATUS "Build Alloc alignment set to ${USE_KALLOC_ALIGNMENT}")
   add_definitions(-DTVM_KALLOC_ALIGNMENT=${USE_KALLOC_ALIGNMENT})
@@ -576,7 +483,6 @@ include(cmake/modules/Metal.cmake)
 include(cmake/modules/ROCM.cmake)
 include(cmake/modules/LLVM.cmake)
 include(cmake/modules/contrib/BLAS.cmake)
-include(cmake/modules/contrib/CODEGENC.cmake)
 include(cmake/modules/contrib/DNNL.cmake)
 include(cmake/modules/contrib/AMX.cmake)
 include(cmake/modules/contrib/CUTLASS.cmake)
@@ -587,10 +493,7 @@ include(cmake/modules/contrib/MSCCLPP.cmake)
 include(cmake/modules/contrib/Sort.cmake)
 include(cmake/modules/contrib/NNPack.cmake)
 include(cmake/modules/contrib/LibTorch.cmake)
-include(cmake/modules/contrib/HybridDump.cmake)
 include(cmake/modules/contrib/TFLite.cmake)
-include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
-include(cmake/modules/contrib/PT_TVMDSOOP.cmake)
 include(cmake/modules/contrib/CoreML.cmake)
 include(cmake/modules/contrib/BNNS.cmake)
 include(cmake/modules/contrib/ONNX.cmake)
@@ -604,7 +507,6 @@ include(cmake/modules/contrib/MSC.cmake)
 include(cmake/modules/contrib/vllm.cmake)
 include(cmake/modules/Git.cmake)
 include(cmake/modules/LibInfo.cmake)
-include(cmake/modules/RustExt.cmake)
 include(cmake/modules/contrib/Mrvl.cmake)
 
 set(LIBINFO_FILE ${CMAKE_CURRENT_LIST_DIR}/src/support/libinfo.cc)
@@ -819,6 +721,19 @@ endif()
 # Custom targets
 add_custom_target(runtime DEPENDS tvm_runtime)
 
+# By default add cython to all build
+find_package(Python)
+if(NOT DEFINED ENV{CONDA_BUILD})
+  message(STATUS ${CMAKE_CURRENT_BINARY_DIR})
+  add_custom_target(
+    tvm_cython ALL
+    ${Python_EXECUTABLE} setup.py build_ext --inplace
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python
+  )
+  add_dependencies(tvm_cython tvm)
+  message("Add Cython build into the default build step")
+endif()
+
 # Installation rules
 install(TARGETS tvm EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
 install(TARGETS tvm_runtime EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
 
@@ -21,7 +21,6 @@
         lint pylint cpplint \
 	cppdoc docs \
 	web webclean \
-	cython cython3 cyclean \
         clean
 
 .SECONDEXPANSION:
@@ -128,7 +127,7 @@ webclean:
 
 # JVM build rules
 INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include
-PKG_CFLAGS = -std=c++11 -Wall -O2 $(INCLUDE_FLAGS) -fPIC
+PKG_CFLAGS = -Wall -O3 $(INCLUDE_FLAGS) -fPIC
 PKG_LDFLAGS =
 
 ifeq ($(OS),Windows_NT)
 
@@ -23,8 +23,8 @@
 [Release Notes](NEWS.md)
 
 Apache TVM is a compiler stack for deep learning systems. It is designed to close the gap between the
-productivity-focused deep learning frameworks, and the performance- and efficiency-focused hardware backends.
-TVM works with deep learning frameworks to provide end to end compilation to different backends.
+productivity-focused deep learning frameworks and the performance- and efficiency-focused hardware backends.
+TVM works with deep learning frameworks to provide end-to-end compilation for different backends.
 
 License
 -------
@@ -33,31 +33,31 @@ TVM is licensed under the [Apache-2.0](LICENSE) license.
 Getting Started
 ---------------
 Check out the [TVM Documentation](https://tvm.apache.org/docs/) site for installation instructions, tutorials, examples, and more.
-The [Getting Started with TVM](https://tvm.apache.org/docs/tutorial/introduction.html) tutorial is a great
+The [Getting Started with TVM](https://tvm.apache.org/docs/get_started/overview.html) tutorial is a great
 place to start.
 
 Contribute to TVM
 -----------------
-TVM adopts apache committer model, we aim to create an open source project that is maintained and owned by the community.
+TVM adopts the Apache committer model. We aim to create an open-source project maintained and owned by the community.
 Check out the [Contributor Guide](https://tvm.apache.org/docs/contribute/).
 
 History and Acknowledgement
 ---------------------------
-TVM started as a research project for deep learning compiler.
-The first version of the project benefited a lot from following projects:
+TVM started as a research project for deep learning compilation.
+The first version of the project benefited a lot from the following projects:
 
 - [Halide](https://github.com/halide/Halide): Part of TVM's TIR and arithmetic simplification module
- originates from Halide. We also learned and adapted some part of lowering pipeline from Halide.
+ originates from Halide. We also learned and adapted some parts of the lowering pipeline from Halide.
 - [Loopy](https://github.com/inducer/loopy): use of integer set analysis and its loop transformation primitives.
 - [Theano](https://github.com/Theano/Theano): the design inspiration of symbolic scan operator for recurrence.
 
 Since then, the project has gone through several rounds of redesigns.
 The current design is also drastically different from the initial design, following the
-development trend of ML compiler community.
+development trend of the ML compiler community.
 
-The most recent version focuses on a cross-level design with TensorIR as tensor-level representation
-and Relax as graph level representation, and python-first transformations.
-The current design goal of the project is to make the ML compiler accessible by enabling most
+The most recent version focuses on a cross-level design with TensorIR as the tensor-level representation
+and Relax as the graph-level representation and Python-first transformations.
+The project's current design goal is to make the ML compiler accessible by enabling most
 transformations to be customizable in Python and bringing a cross-level representation that can jointly
-optimize computational graphs, tensor programs, and libraries. The project also serves as a foundation
-infra to build python-first vertical compilers for various domains, such as LLMs.
+optimize computational graphs, tensor programs, and libraries. The project is also a foundation
+infra for building Python-first vertical compilers for domains, such as LLMs.
@@ -38,8 +38,6 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph_executor/graph_executor.cc"
-#include "../src/runtime/graph_executor/graph_executor_factory.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/logging.cc"
 #include "../src/runtime/memory/memory_manager.cc"
@@ -66,7 +64,6 @@
 #include "../src/runtime/opencl/opencl_device_api.cc"
 #include "../src/runtime/opencl/opencl_module.cc"
 #include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
-#include "../src/runtime/opencl/texture_pool.cc"
 #include "../src/runtime/source_utils.cc"
 #endif
 
 
@@ -56,37 +56,15 @@ def test_rpc_module():
     tracker = rpc.connect_tracker(tracker_host, tracker_port)
     remote = tracker.request(key, priority=0, session_timeout=60)
 
-    # Compile the Graph for CPU target
-    s = te.create_schedule(B.op)
-    xo, xi = s[B].split(B.op.axis[0], factor=64)
-    s[B].parallel(xi)
-    s[B].pragma(xo, "parallel_launch_point")
-    s[B].pragma(xi, "parallel_barrier_when_finish")
-    f = tvm.build(s, [A, B], target, name="myadd_cpu")
-    path_dso_cpu = temp.relpath("cpu_lib.so")
-    f.export_library(path_dso_cpu, fcompile=ndk.create_shared)
+    mod = tvm.IRModule.from_expr(te.create_prim_func([A, B]).with_attr("global_symbol", "myadd"))
+    sch = tvm.tir.Schedule(mod)
+    (x,) = sch.get_loops(block=sch.get_block("B"))
+    xo, xi = sch.split(i, [None, 32])
+    sch.bind(xo, "blockIdx.x")
+    sch.bind(xi, "threadIdx.x")
 
-    # Execute the portable graph on cpu target
-    print("Run CPU test ...")
-    dev = remote.cpu(0)
-    remote.upload(path_dso_cpu)
-    f2 = remote.load_module("cpu_lib.so")
-    a = tvm.nd.array(a_np, dev)
-    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
-    time_f = f2.time_evaluator(f2.entry_name, dev, number=10)
-    cost = time_f(a, b).mean
-    print("%g secs/op\n" % cost)
-    np.testing.assert_equal(b.numpy(), a.numpy() + 1)
-
-    # Compile the Graph for OpenCL target
     if test_opencl:
-        s = te.create_schedule(B.op)
-        xo, xi = s[B].split(B.op.axis[0], factor=64)
-        s[B].bind(xi, te.thread_axis("threadIdx.x"))
-        s[B].bind(xo, te.thread_axis("blockIdx.x"))
-        # Build the dynamic lib.
-        # If we don't want to do metal and only use cpu, just set target to be target
-        f = tvm.build(s, [A, B], tvm.target.Target("opencl", host=target), name="myadd")
+        f = tvm.build(sch.mod, target=tvm.target.Target("opencl", host=target))
         path_dso_cl = temp.relpath("dev_lib_cl.so")
         f.export_library(path_dso_cl, fcompile=ndk.create_shared)
 
@@ -101,29 +79,6 @@ def test_rpc_module():
         print("%g secs/op\n" % cost)
         np.testing.assert_equal(b.numpy(), a.numpy() + 1)
 
-    # Compile the Graph for Vulkan target
-    if test_vulkan:
-        s = te.create_schedule(B.op)
-        xo, xi = s[B].split(B.op.axis[0], factor=64)
-        s[B].bind(xi, te.thread_axis("threadIdx.x"))
-        s[B].bind(xo, te.thread_axis("blockIdx.x"))
-        # Build the dynamic lib.
-        # If we don't want to do metal and only use cpu, just set target to be target
-        f = tvm.build(s, [A, B], tvm.target.Target("vulkan", host=target), name="myadd")
-        path_dso_vulkan = temp.relpath("dev_lib_vulkan.so")
-        f.export_library(path_dso_vulkan, fcompile=ndk.create_shared)
-
-        print("Run GPU(Vulkan Flavor) test ...")
-        dev = remote.vulkan(0)
-        remote.upload(path_dso_vulkan)
-        f1 = remote.load_module("dev_lib_vulkan.so")
-        a = tvm.nd.array(a_np, dev)
-        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
-        time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
-        cost = time_f(a, b).mean
-        print("%g secs/op\n" % cost)
-        np.testing.assert_equal(b.numpy(), a.numpy() + 1)
-
 
 if __name__ == "__main__":
     test_rpc_module()