Skip to content

Commit 506386d

Browse files
authored
Merge branch 'apache:main' into generic-attention
2 parents 7569674 + a531d17 commit 506386d

File tree

2,143 files changed

+3660
-521447
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,143 files changed

+3660
-521447
lines changed

CMakeLists.txt

Lines changed: 13 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,13 @@ tvm_option(USE_THREADS "Build with thread support" ON)
5757
tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
5858
tvm_option(USE_MLIR "Build with MLIR support" OFF)
5959
tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
60-
tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
61-
tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
62-
tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
63-
tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
6460
tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
65-
tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
6661
tvm_option(TVM_DEBUG_WITH_ABI_CHANGE "Enable debug code that may cause ABI changes" OFF)
6762
tvm_option(TVM_LOG_BEFORE_THROW "Whether log before throw, for debugging purposes" OFF)
6863
tvm_option(USE_RTTI "Build with RTTI" ON)
6964
tvm_option(USE_MSVC_MT "Build with MT" OFF)
7065
tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF)
7166
tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF)
72-
tvm_option(USE_TF_TVMDSOOP "Build with TensorFlow TVMDSOOp" OFF)
73-
tvm_option(USE_PT_TVMDSOOP "Build with PyTorch TVMDSOOp" OFF)
7467
tvm_option(USE_FALLBACK_STL_MAP "Use TVM's POD compatible Map" OFF)
7568
tvm_option(INDEX_DEFAULT_I64 "Defaults the index datatype to int64" ON)
7669
tvm_option(USE_LIBBACKTRACE "Use libbacktrace to supply linenumbers on stack traces" AUTO)
@@ -335,34 +328,6 @@ tvm_file_glob(GLOB CODEGEN_SRCS
335328

336329
list(APPEND COMPILER_SRCS ${CODEGEN_SRCS})
337330

338-
tvm_file_glob(GLOB_RECURSE RELAY_OP_SRCS
339-
src/relay/op/*.cc
340-
)
341-
tvm_file_glob(GLOB_RECURSE RELAY_PASS_SRCS
342-
src/relay/analysis/*.cc
343-
src/relay/collage/*.cc
344-
src/relay/transforms/*.cc
345-
src/relay/quantize/*.cc
346-
)
347-
tvm_file_glob(GLOB RELAY_BACKEND_SRCS
348-
src/relay/backend/*.cc
349-
src/relay/backend/vm/*.cc
350-
src/relay/backend/aot/*.cc
351-
)
352-
tvm_file_glob(GLOB_RECURSE RELAY_IR_SRCS
353-
src/relay/ir/*.cc
354-
src/relay/printer/*.cc
355-
src/relay/parser/*.cc
356-
)
357-
tvm_file_glob(GLOB_RECURSE RELAY_QNN_SRCS
358-
src/relay/qnn/*.cc
359-
)
360-
list(APPEND COMPILER_SRCS ${RELAY_OP_SRCS})
361-
list(APPEND COMPILER_SRCS ${RELAY_PASS_SRCS})
362-
list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS})
363-
list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS})
364-
list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS})
365-
366331
tvm_file_glob(GLOB DATATYPE_SRCS src/target/datatype/*.cc)
367332
list(APPEND COMPILER_SRCS ${DATATYPE_SRCS})
368333
list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc")
@@ -418,51 +383,6 @@ else()
418383
list(APPEND COMPILER_SRCS ${STACKVM_RUNTIME_SRCS})
419384
endif(USE_STACKVM_RUNTIME)
420385

421-
# NOTE(areusch): USE_GRAPH_RUNTIME will be deleted in a future release
422-
if(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
423-
message(WARNING "USE_GRAPH_RUNTIME renamed to USE_GRAPH_EXECUTOR. Please update your config.cmake")
424-
set(USE_GRAPH_EXECUTOR ${USE_GRAPH_RUNTIME})
425-
unset(USE_GRAPH_RUNTIME CACHE)
426-
endif(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
427-
428-
# NOTE(areusch): USE_GRAPH_RUNTIME_DEBUG will be deleted in a future release
429-
if(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_PROFILER)
430-
message(WARNING "USE_GRAPH_RUNTIME_DEBUG renamed to USE_PROFILER. Please update your config.cmake")
431-
set(USE_PROFILER ${USE_GRAPH_RUNTIME_DEBUG})
432-
unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
433-
endif(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_PROFILER)
434-
435-
if(USE_GRAPH_EXECUTOR)
436-
message(STATUS "Build with Graph Executor support...")
437-
tvm_file_glob(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
438-
list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
439-
440-
endif(USE_GRAPH_EXECUTOR)
441-
442-
# convert old options for profiler
443-
if(USE_GRAPH_EXECUTOR_DEBUG)
444-
message(WARNING "USE_GRAPH_EXECUTOR_DEBUG renamed to USE_PROFILER. Please update your config.cmake")
445-
unset(USE_GRAPH_EXECUTOR_DEBUG CACHE)
446-
set(USE_PROFILER ON)
447-
endif()
448-
if(USE_VM_PROFILER)
449-
message(WARNING "USE_VM_PROFILER renamed to USE_PROFILER. Please update your config.cmake")
450-
unset(USE_VM_PROFILER CACHE)
451-
set(USE_PROFILER ON)
452-
endif()
453-
454-
if(USE_PROFILER)
455-
message(STATUS "Build with profiler...")
456-
457-
tvm_file_glob(GLOB RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS src/runtime/graph_executor/debug/*.cc)
458-
list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS})
459-
set_source_files_properties(${RUNTIME_GRAPH_EXECUTOR_SRCS}
460-
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_EXECUTOR_DEBUG")
461-
462-
tvm_file_glob(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
463-
list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
464-
endif(USE_PROFILER)
465-
466386
if(USE_CUDA AND USE_NCCL)
467387
message(STATUS "Build with NCCL...")
468388
find_nccl(${USE_NCCL})
@@ -493,13 +413,6 @@ if(USE_ROCM AND USE_RCCL)
493413
list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC})
494414
endif()
495415

496-
if(USE_AOT_EXECUTOR)
497-
message(STATUS "Build with AOT Executor support...")
498-
file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
499-
list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})
500-
501-
endif(USE_AOT_EXECUTOR)
502-
503416
# Enable ctest if gtest is available
504417
if(USE_GTEST)
505418
# Check env var for backward compatibility. A better way to specify package
@@ -538,12 +451,6 @@ if(USE_GTEST)
538451
endif()
539452
endif()
540453

541-
if(USE_PIPELINE_EXECUTOR)
542-
message(STATUS "Build with Pipeline Executor support...")
543-
tvm_file_glob(GLOB RUNTIME_PIPELINE_SRCS src/runtime/pipeline/*.cc)
544-
list(APPEND RUNTIME_SRCS ${RUNTIME_PIPELINE_SRCS})
545-
endif(USE_PIPELINE_EXECUTOR)
546-
547454
if(USE_KALLOC_ALIGNMENT)
548455
message(STATUS "Build Alloc alignment set to ${USE_KALLOC_ALIGNMENT}")
549456
add_definitions(-DTVM_KALLOC_ALIGNMENT=${USE_KALLOC_ALIGNMENT})
@@ -576,7 +483,6 @@ include(cmake/modules/Metal.cmake)
576483
include(cmake/modules/ROCM.cmake)
577484
include(cmake/modules/LLVM.cmake)
578485
include(cmake/modules/contrib/BLAS.cmake)
579-
include(cmake/modules/contrib/CODEGENC.cmake)
580486
include(cmake/modules/contrib/DNNL.cmake)
581487
include(cmake/modules/contrib/AMX.cmake)
582488
include(cmake/modules/contrib/CUTLASS.cmake)
@@ -587,10 +493,7 @@ include(cmake/modules/contrib/MSCCLPP.cmake)
587493
include(cmake/modules/contrib/Sort.cmake)
588494
include(cmake/modules/contrib/NNPack.cmake)
589495
include(cmake/modules/contrib/LibTorch.cmake)
590-
include(cmake/modules/contrib/HybridDump.cmake)
591496
include(cmake/modules/contrib/TFLite.cmake)
592-
include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
593-
include(cmake/modules/contrib/PT_TVMDSOOP.cmake)
594497
include(cmake/modules/contrib/CoreML.cmake)
595498
include(cmake/modules/contrib/BNNS.cmake)
596499
include(cmake/modules/contrib/ONNX.cmake)
@@ -604,7 +507,6 @@ include(cmake/modules/contrib/MSC.cmake)
604507
include(cmake/modules/contrib/vllm.cmake)
605508
include(cmake/modules/Git.cmake)
606509
include(cmake/modules/LibInfo.cmake)
607-
include(cmake/modules/RustExt.cmake)
608510
include(cmake/modules/contrib/Mrvl.cmake)
609511

610512
set(LIBINFO_FILE ${CMAKE_CURRENT_LIST_DIR}/src/support/libinfo.cc)
@@ -819,6 +721,19 @@ endif()
819721
# Custom targets
820722
add_custom_target(runtime DEPENDS tvm_runtime)
821723

724+
# By default add cython to all build
725+
find_package(Python)
726+
if(NOT DEFINED ENV{CONDA_BUILD})
727+
message(STATUS ${CMAKE_CURRENT_BINARY_DIR})
728+
add_custom_target(
729+
tvm_cython ALL
730+
${Python_EXECUTABLE} setup.py build_ext --inplace
731+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python
732+
)
733+
add_dependencies(tvm_cython tvm)
734+
message("Add Cython build into the default build step")
735+
endif()
736+
822737
# Installation rules
823738
install(TARGETS tvm EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
824739
install(TARGETS tvm_runtime EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})

Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
lint pylint cpplint \
2222
cppdoc docs \
2323
web webclean \
24-
cython cython3 cyclean \
2524
clean
2625

2726
.SECONDEXPANSION:
@@ -128,7 +127,7 @@ webclean:
128127

129128
# JVM build rules
130129
INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include
131-
PKG_CFLAGS = -std=c++11 -Wall -O2 $(INCLUDE_FLAGS) -fPIC
130+
PKG_CFLAGS = -Wall -O3 $(INCLUDE_FLAGS) -fPIC
132131
PKG_LDFLAGS =
133132

134133
ifeq ($(OS),Windows_NT)

README.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
[Release Notes](NEWS.md)
2424

2525
Apache TVM is a compiler stack for deep learning systems. It is designed to close the gap between the
26-
productivity-focused deep learning frameworks, and the performance- and efficiency-focused hardware backends.
27-
TVM works with deep learning frameworks to provide end to end compilation to different backends.
26+
productivity-focused deep learning frameworks and the performance- and efficiency-focused hardware backends.
27+
TVM works with deep learning frameworks to provide end-to-end compilation for different backends.
2828

2929
License
3030
-------
@@ -33,31 +33,31 @@ TVM is licensed under the [Apache-2.0](LICENSE) license.
3333
Getting Started
3434
---------------
3535
Check out the [TVM Documentation](https://tvm.apache.org/docs/) site for installation instructions, tutorials, examples, and more.
36-
The [Getting Started with TVM](https://tvm.apache.org/docs/tutorial/introduction.html) tutorial is a great
36+
The [Getting Started with TVM](https://tvm.apache.org/docs/get_started/overview.html) tutorial is a great
3737
place to start.
3838

3939
Contribute to TVM
4040
-----------------
41-
TVM adopts apache committer model, we aim to create an open source project that is maintained and owned by the community.
41+
TVM adopts the Apache committer model. We aim to create an open-source project maintained and owned by the community.
4242
Check out the [Contributor Guide](https://tvm.apache.org/docs/contribute/).
4343

4444
History and Acknowledgement
4545
---------------------------
46-
TVM started as a research project for deep learning compiler.
47-
The first version of the project benefited a lot from following projects:
46+
TVM started as a research project for deep learning compilation.
47+
The first version of the project benefited a lot from the following projects:
4848

4949
- [Halide](https://github.com/halide/Halide): Part of TVM's TIR and arithmetic simplification module
50-
originates from Halide. We also learned and adapted some part of lowering pipeline from Halide.
50+
originates from Halide. We also learned and adapted some parts of the lowering pipeline from Halide.
5151
- [Loopy](https://github.com/inducer/loopy): use of integer set analysis and its loop transformation primitives.
5252
- [Theano](https://github.com/Theano/Theano): the design inspiration of symbolic scan operator for recurrence.
5353

5454
Since then, the project has gone through several rounds of redesigns.
5555
The current design is also drastically different from the initial design, following the
56-
development trend of ML compiler community.
56+
development trend of the ML compiler community.
5757

58-
The most recent version focuses on a cross-level design with TensorIR as tensor-level representation
59-
and Relax as graph level representation, and python-first transformations.
60-
The current design goal of the project is to make the ML compiler accessible by enabling most
58+
The most recent version focuses on a cross-level design with TensorIR as the tensor-level representation
59+
and Relax as the graph-level representation and Python-first transformations.
60+
The project's current design goal is to make the ML compiler accessible by enabling most
6161
transformations to be customizable in Python and bringing a cross-level representation that can jointly
62-
optimize computational graphs, tensor programs, and libraries. The project also serves as a foundation
63-
infra to build python-first vertical compilers for various domains, such as LLMs.
62+
optimize computational graphs, tensor programs, and libraries. The project is also a foundation
63+
infra for building Python-first vertical compilers for domains, such as LLMs.

apps/android_rpc/app/src/main/jni/tvm_runtime.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
#include "../src/runtime/cpu_device_api.cc"
3939
#include "../src/runtime/dso_library.cc"
4040
#include "../src/runtime/file_utils.cc"
41-
#include "../src/runtime/graph_executor/graph_executor.cc"
42-
#include "../src/runtime/graph_executor/graph_executor_factory.cc"
4341
#include "../src/runtime/library_module.cc"
4442
#include "../src/runtime/logging.cc"
4543
#include "../src/runtime/memory/memory_manager.cc"
@@ -66,7 +64,6 @@
6664
#include "../src/runtime/opencl/opencl_device_api.cc"
6765
#include "../src/runtime/opencl/opencl_module.cc"
6866
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
69-
#include "../src/runtime/opencl/texture_pool.cc"
7067
#include "../src/runtime/source_utils.cc"
7168
#endif
7269

apps/android_rpc/tests/android_rpc_test.py

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -56,37 +56,15 @@ def test_rpc_module():
5656
tracker = rpc.connect_tracker(tracker_host, tracker_port)
5757
remote = tracker.request(key, priority=0, session_timeout=60)
5858

59-
# Compile the Graph for CPU target
60-
s = te.create_schedule(B.op)
61-
xo, xi = s[B].split(B.op.axis[0], factor=64)
62-
s[B].parallel(xi)
63-
s[B].pragma(xo, "parallel_launch_point")
64-
s[B].pragma(xi, "parallel_barrier_when_finish")
65-
f = tvm.build(s, [A, B], target, name="myadd_cpu")
66-
path_dso_cpu = temp.relpath("cpu_lib.so")
67-
f.export_library(path_dso_cpu, fcompile=ndk.create_shared)
59+
mod = tvm.IRModule.from_expr(te.create_prim_func([A, B]).with_attr("global_symbol", "myadd"))
60+
sch = tvm.tir.Schedule(mod)
61+
(x,) = sch.get_loops(block=sch.get_block("B"))
62+
xo, xi = sch.split(i, [None, 32])
63+
sch.bind(xo, "blockIdx.x")
64+
sch.bind(xi, "threadIdx.x")
6865

69-
# Execute the portable graph on cpu target
70-
print("Run CPU test ...")
71-
dev = remote.cpu(0)
72-
remote.upload(path_dso_cpu)
73-
f2 = remote.load_module("cpu_lib.so")
74-
a = tvm.nd.array(a_np, dev)
75-
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
76-
time_f = f2.time_evaluator(f2.entry_name, dev, number=10)
77-
cost = time_f(a, b).mean
78-
print("%g secs/op\n" % cost)
79-
np.testing.assert_equal(b.numpy(), a.numpy() + 1)
80-
81-
# Compile the Graph for OpenCL target
8266
if test_opencl:
83-
s = te.create_schedule(B.op)
84-
xo, xi = s[B].split(B.op.axis[0], factor=64)
85-
s[B].bind(xi, te.thread_axis("threadIdx.x"))
86-
s[B].bind(xo, te.thread_axis("blockIdx.x"))
87-
# Build the dynamic lib.
88-
# If we don't want to do metal and only use cpu, just set target to be target
89-
f = tvm.build(s, [A, B], tvm.target.Target("opencl", host=target), name="myadd")
67+
f = tvm.build(sch.mod, target=tvm.target.Target("opencl", host=target))
9068
path_dso_cl = temp.relpath("dev_lib_cl.so")
9169
f.export_library(path_dso_cl, fcompile=ndk.create_shared)
9270

@@ -101,29 +79,6 @@ def test_rpc_module():
10179
print("%g secs/op\n" % cost)
10280
np.testing.assert_equal(b.numpy(), a.numpy() + 1)
10381

104-
# Compile the Graph for Vulkan target
105-
if test_vulkan:
106-
s = te.create_schedule(B.op)
107-
xo, xi = s[B].split(B.op.axis[0], factor=64)
108-
s[B].bind(xi, te.thread_axis("threadIdx.x"))
109-
s[B].bind(xo, te.thread_axis("blockIdx.x"))
110-
# Build the dynamic lib.
111-
# If we don't want to do metal and only use cpu, just set target to be target
112-
f = tvm.build(s, [A, B], tvm.target.Target("vulkan", host=target), name="myadd")
113-
path_dso_vulkan = temp.relpath("dev_lib_vulkan.so")
114-
f.export_library(path_dso_vulkan, fcompile=ndk.create_shared)
115-
116-
print("Run GPU(Vulkan Flavor) test ...")
117-
dev = remote.vulkan(0)
118-
remote.upload(path_dso_vulkan)
119-
f1 = remote.load_module("dev_lib_vulkan.so")
120-
a = tvm.nd.array(a_np, dev)
121-
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
122-
time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
123-
cost = time_f(a, b).mean
124-
print("%g secs/op\n" % cost)
125-
np.testing.assert_equal(b.numpy(), a.numpy() + 1)
126-
12782

12883
if __name__ == "__main__":
12984
test_rpc_module()

0 commit comments

Comments
 (0)