Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions apps/android_rpc/app/src/main/jni/tvm_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,15 @@
#include "../ffi/src/ffi/extra/module.cc"
#include "../ffi/src/ffi/extra/testing.cc"
#include "../ffi/src/ffi/function.cc"
#include "../ffi/src/ffi/ndarray.cc"
#include "../ffi/src/ffi/object.cc"
#include "../ffi/src/ffi/tensor.cc"
#include "../ffi/src/ffi/traceback.cc"
#include "../src/runtime/cpu_device_api.cc"
#include "../src/runtime/device_api.cc"
#include "../src/runtime/file_utils.cc"
#include "../src/runtime/logging.cc"
#include "../src/runtime/memory/memory_manager.cc"
#include "../src/runtime/minrpc/minrpc_logger.cc"
#include "../src/runtime/ndarray.cc"
#include "../src/runtime/profiling.cc"
#include "../src/runtime/registry.cc"
#include "../src/runtime/rpc/rpc_channel.cc"
Expand All @@ -63,6 +62,7 @@
#include "../src/runtime/rpc/rpc_server_env.cc"
#include "../src/runtime/rpc/rpc_session.cc"
#include "../src/runtime/rpc/rpc_socket_impl.cc"
#include "../src/runtime/tensor.cc"
#include "../src/runtime/thread_pool.cc"
#include "../src/runtime/threading_backend.cc"
#include "../src/runtime/workspace_pool.cc"
Expand Down
4 changes: 2 additions & 2 deletions apps/android_rpc/tests/android_rpc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def test_rpc_module():
dev = remote.cl(0)
remote.upload(path_dso_cl)
f1 = remote.load_module("dev_lib_cl.so")
a = tvm.nd.array(a_np, dev)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
a = tvm.runtime.tensor(a_np, dev)
b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
cost = time_f(a, b).mean
print("%g secs/op\n" % cost)
Expand Down
2 changes: 1 addition & 1 deletion apps/hexagon_launcher/launcher_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <tvm/ffi/function.h>
#include <tvm/runtime/data_type.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/tensor.h>

#include <string>
#include <vector>
Expand Down
12 changes: 6 additions & 6 deletions apps/hexagon_launcher/launcher_hexagon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ AEEResult __QAIC_HEADER(launcher_rpc_set_input)(remote_handle64 handle, int inpu
};
DLManagedTensor managed{tensor, /*manager_ctx*/ nullptr, /*deleter*/ nullptr};

auto input = tvm::runtime::NDArray::FromDLPack(&managed);
auto input = tvm::runtime::Tensor::FromDLPack(&managed);

tvm::ffi::Function set_input = get_module_func(TheModel->model_executor, "set_input");
set_input(input_idx, input);
Expand Down Expand Up @@ -172,17 +172,17 @@ AEEResult __QAIC_HEADER(launcher_rpc_get_output)(remote_handle64 handle, int out
}

tvm::ffi::Function get_output = get_module_func(TheModel->model_executor, "get_output");
tvm::runtime::NDArray output = get_output(output_idx);
tvm::runtime::Tensor output = get_output(output_idx);

std::vector<int64_t> shape_vec{output->shape, output->shape + output->ndim};

auto* container = new tvm::runtime::NDArray::Container(
static_cast<void*>(output_value), shape_vec, output->dtype, Model::external());
auto* container = new tvm::runtime::Tensor::Container(static_cast<void*>(output_value), shape_vec,
output->dtype, Model::external());
container->SetDeleter([](tvm::Object* container) {
delete static_cast<tvm::runtime::NDArray::Container*>(container);
delete static_cast<tvm::runtime::Tensor::Container*>(container);
});

tvm::runtime::NDArray host_output(tvm::runtime::GetObjectPtr<tvm::runtime::Object>(container));
tvm::runtime::Tensor host_output(tvm::runtime::GetObjectPtr<tvm::runtime::Object>(container));

if (meta_size != 0) {
auto* meta = reinterpret_cast<tensor_meta*>(output_meta);
Expand Down
4 changes: 2 additions & 2 deletions apps/ios_rpc/tests/ios_rpc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def test_rpc_module(host, port, key, mode):
dev = remote.metal(0)
f1 = remote.load_module("dev_lib.dylib")
a_np = np.random.uniform(size=1024).astype(A.dtype)
a = tvm.nd.array(a_np, dev)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
a = tvm.runtime.tensor(a_np, dev)
b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
cost = time_f(a, b).mean
print("Metal: %g secs/op" % cost)
Expand Down
6 changes: 3 additions & 3 deletions docs/arch/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ The main goal of TVM's runtime is to provide a minimal API for loading and execu
import tvm
# Example runtime execution program in python, with type annotated
mod: tvm.runtime.Module = tvm.runtime.load_module("compiled_artifact.so")
arr: tvm.runtime.NDArray = tvm.nd.array([1, 2, 3], device=tvm.cuda(0))
arr: tvm.runtime.Tensor = tvm.runtime.tensor([1, 2, 3], device=tvm.cuda(0))
fun: tvm.runtime.PackedFunc = mod["addone"]
fun(arr)
print(arr.numpy())
Expand All @@ -142,7 +142,7 @@ The main goal of TVM's runtime is to provide a minimal API for loading and execu
:py:class:`tvm.runtime.Module` encapsulates the result of compilation. A runtime.Module contains a GetFunction method to obtain PackedFuncs by name.

:py:class:`tvm.runtime.PackedFunc` is a type-erased function interface for both the generated functions. A runtime.PackedFunc can take arguments and return values with the
following types: POD types(int, float), string, runtime.PackedFunc, runtime.Module, runtime.NDArray, and other sub-classes of runtime.Object.
following types: POD types(int, float), string, runtime.PackedFunc, runtime.Module, runtime.Tensor, and other sub-classes of runtime.Object.

:py:class:`tvm.runtime.Module` and :py:class:`tvm.runtime.PackedFunc` are powerful mechanisms to modularize the runtime. For example, to get the above `addone` function on CUDA, we can use LLVM to generate the host-side code to compute the launching parameters(e.g. size of the thread groups) and then call into another PackedFunc from a CUDAModule that is backed by the CUDA driver API. The same mechanism can be used for OpenCL kernels.

Expand All @@ -155,7 +155,7 @@ The above example only deals with a simple `addone` function. The code snippet b
factory: tvm.runtime.Module = tvm.runtime.load_module("resnet18.so")
# Create a stateful graph execution module for resnet18 on cuda(0)
gmod: tvm.runtime.Module = factory["resnet18"](tvm.cuda(0))
data: tvm.runtime.NDArray = get_input_data()
data: tvm.runtime.Tensor = get_input_data()
# set input
gmod["set_input"](0, data)
# execute the model
Expand Down
6 changes: 3 additions & 3 deletions docs/deep_dive/tensor_ir/tutorials/tir_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ def mm_relu(a: T.handle, b: T.handle, c: T.handle):


def evaluate_dynamic_shape(lib: tvm.runtime.Module, m: int, n: int, k: int):
A = tvm.nd.array(np.random.uniform(size=(m, k)).astype("float32"))
B = tvm.nd.array(np.random.uniform(size=(k, n)).astype("float32"))
C = tvm.nd.array(np.zeros((m, n), dtype="float32"))
A = tvm.runtime.tensor(np.random.uniform(size=(m, k)).astype("float32"))
B = tvm.runtime.tensor(np.random.uniform(size=(k, n)).astype("float32"))
C = tvm.runtime.tensor(np.zeros((m, n), dtype="float32"))
lib(A, B, C)
return C.numpy()

Expand Down
6 changes: 3 additions & 3 deletions docs/deep_dive/tensor_ir/tutorials/tir_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def main(
b_np = np.random.uniform(size=(128, 128)).astype("float32")
c_np = a_np @ b_np

a_nd = tvm.nd.array(a_np)
b_nd = tvm.nd.array(b_np)
c_nd = tvm.nd.array(np.zeros((128, 128), dtype="float32"))
a_nd = tvm.runtime.tensor(a_np)
b_nd = tvm.runtime.tensor(b_np)
c_nd = tvm.runtime.tensor(np.zeros((128, 128), dtype="float32"))


def evaluate(mod: tvm.IRModule):
Expand Down
6 changes: 3 additions & 3 deletions docs/get_started/tutorials/ir_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def main(
vm = relax.VirtualMachine(exec, dev)

raw_data = np.random.rand(1, 784).astype("float32")
data = tvm.nd.array(raw_data, dev)
data = tvm.runtime.tensor(raw_data, dev)
cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
print(cpu_out)

Expand Down Expand Up @@ -267,8 +267,8 @@ def main(
dev = tvm.device("cuda", 0)
vm = relax.VirtualMachine(exec, dev)
# Need to allocate data and params on GPU device
data = tvm.nd.array(raw_data, dev)
gpu_params = [tvm.nd.array(p, dev) for p in params_from_torch["main"]]
data = tvm.runtime.tensor(raw_data, dev)
gpu_params = [tvm.runtime.tensor(p, dev) for p in params_from_torch["main"]]
gpu_out = vm["main"](data, *gpu_params).numpy()
print(gpu_out)

Expand Down
16 changes: 8 additions & 8 deletions docs/get_started/tutorials/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ def forward(self, x):
device = tvm.cpu()
vm = relax.VirtualMachine(ex, device)
data = np.random.rand(1, 784).astype("float32")
tvm_data = tvm.nd.array(data, device=device)
tvm_data = tvm.runtime.tensor(data, device=device)
params = [np.random.rand(*param.shape).astype("float32") for _, param in param_spec]
params = [tvm.nd.array(param, device=device) for param in params]
params = [tvm.runtime.tensor(param, device=device) for param in params]
print(vm["forward"](tvm_data, *params).numpy())

################################################################################
Expand All @@ -158,14 +158,14 @@ def forward(self, x):
# prefill_logits = vm["prefill"](inputs, weight, kv_cache)
# decoded_logits = vm["decode"](inputs, weight, kv_cache)
#
# - TVM runtime comes with native data structures, such as NDArray, can also have zero
# - TVM runtime comes with native data structures, such as Tensor, can also have zero
# copy exchange with existing ecosystem (DLPack exchange with PyTorch)
#
# .. code-block:: Python
#
# # Convert PyTorch tensor to TVM NDArray
# x_tvm = tvm.nd.from_dlpack(x_torch.to_dlpack())
# # Convert TVM NDArray to PyTorch tensor
# # Convert PyTorch tensor to TVM Tensor
# x_tvm = tvm.runtime.from_dlpack(x_torch.to_dlpack())
# # Convert TVM Tensor to PyTorch tensor
# x_torch = torch.from_dlpack(x_tvm.to_dlpack())
#
# - TVM runtime works in non-python environments, so it works on settings such as mobile
Expand All @@ -175,14 +175,14 @@ def forward(self, x):
# // C++ snippet
# runtime::Module vm = ex.GetFunction("load_executable")();
# vm.GetFunction("init")(...);
# NDArray out = vm.GetFunction("prefill")(data, weight, kv_cache);
# Tensor out = vm.GetFunction("prefill")(data, weight, kv_cache);
#
# .. code-block:: Java
#
# // Java snippet
# Module vm = ex.getFunction("load_executable").invoke();
# vm.getFunction("init").pushArg(...).invoke;
# NDArray out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
# Tensor out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
#

################################################################################
Expand Down
8 changes: 4 additions & 4 deletions docs/how_to/tutorials/cross_compilation_and_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@

# create arrays on the remote device
dev = remote.cpu()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
a = tvm.runtime.tensor(np.random.uniform(size=1024).astype(A.dtype), dev)
b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
# the function will run on the remote device
func(a, b)
np.testing.assert_equal(b.numpy(), a.numpy() + 1)
Expand Down Expand Up @@ -249,8 +249,8 @@ def run_opencl():

# run
dev = remote.cl()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
a = tvm.runtime.tensor(np.random.uniform(size=1024).astype(A.dtype), dev)
b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
func(a, b)
np.testing.assert_equal(b.numpy(), a.numpy() + 1)
print("OpenCL test passed!")
Expand Down
4 changes: 2 additions & 2 deletions docs/how_to/tutorials/customize_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassContext) -> IR
dev = tvm.device("cuda", 0)
vm = relax.VirtualMachine(ex, dev)
# Need to allocate data and params on GPU device
data = tvm.nd.array(np.random.rand(*input_shape).astype("float32"), dev)
gpu_params = [tvm.nd.array(np.random.rand(*p.shape).astype(p.dtype), dev) for _, p in params]
data = tvm.runtime.tensor(np.random.rand(*input_shape).astype("float32"), dev)
gpu_params = [tvm.runtime.tensor(np.random.rand(*p.shape).astype(p.dtype), dev) for _, p in params]
gpu_out = vm["forward"](data, *gpu_params).numpy()
print(gpu_out)

Expand Down
4 changes: 2 additions & 2 deletions docs/how_to/tutorials/e2e_opt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@
dev = tvm.device("cuda", 0)
vm = relax.VirtualMachine(ex, dev)
# Need to allocate data and params on GPU device
gpu_data = tvm.nd.array(np.random.rand(1, 3, 224, 224).astype("float32"), dev)
gpu_params = [tvm.nd.array(p, dev) for p in params["main"]]
gpu_data = tvm.runtime.tensor(np.random.rand(1, 3, 224, 224).astype("float32"), dev)
gpu_params = [tvm.runtime.tensor(p, dev) for p in params["main"]]
gpu_out = vm["main"](gpu_data, *gpu_params).numpy()

print(gpu_out.shape)
6 changes: 3 additions & 3 deletions docs/how_to/tutorials/optimize_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def _pipeline(mod: tvm.ir.IRModule, _ctx: tvm.transform.PassContext) -> tvm.ir.I

# Convert params into ndarray
params = [
tvm.nd.array(param_dict[k].astype("float16"), device=dev) for k in named_params.keys()
tvm.runtime.tensor(param_dict[k].astype("float16"), device=dev) for k in named_params.keys()
]


Expand Down Expand Up @@ -523,7 +523,7 @@ def _pipeline(mod: tvm.ir.IRModule, _ctx: tvm.transform.PassContext) -> tvm.ir.I
input_len = len(prompt)

# Load prompt tokens into TVM ndarray on the target device
tokens = tvm.nd.array(np.array(prompt).astype("int32"), device=dev)
tokens = tvm.runtime.tensor(np.array(prompt).astype("int32"), device=dev)

######################################################################
# Create the KVCache
Expand Down Expand Up @@ -609,7 +609,7 @@ def sample_token(logits):
print("The generated token:")

while last_token != tokenizer.eos_token_id:
tokens = tvm.nd.array(np.array([last_token]).astype("int32"), device=dev)
tokens = tvm.runtime.tensor(np.array([last_token]).astype("int32"), device=dev)
hidden_states = embed(tokens, params)
begin_forward_func(kv_cache, ShapeTuple([seq_id]), ShapeTuple([1]))
logits, kv_cache = vm["decode"](hidden_states, kv_cache, params)
Expand Down
1 change: 0 additions & 1 deletion docs/reference/api/python/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ Python API
:caption: tvm.runtime

runtime/runtime
runtime/ndarray
runtime/vm
runtime/disco
runtime/profiling
Expand Down
21 changes: 0 additions & 21 deletions docs/reference/api/python/runtime/ndarray.rst

This file was deleted.

1 change: 0 additions & 1 deletion docs/reference/api/python/runtime/runtime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ tvm.runtime
-----------
.. automodule:: tvm.runtime
:members:
:exclude-members: NDArray
4 changes: 2 additions & 2 deletions ffi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ set(tvm_ffi_objs_sources
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/object.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/error.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/function.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/ndarray.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/tensor.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/dtype.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/container.cc"
)
Expand Down Expand Up @@ -189,7 +189,7 @@ if (TVM_FFI_BUILD_PYTHON_MODULE)
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/dtype.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/error.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/function.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/ndarray.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/tensor.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/object.pxi
${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/string.pxi
)
Expand Down
1 change: 1 addition & 0 deletions ffi/docs/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
_build
**/generated/*.rst
6 changes: 5 additions & 1 deletion ffi/docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ BUILDDIR = _build
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile livehtml
.PHONY: help Makefile livehtml clean

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
Expand All @@ -34,3 +34,7 @@ help:

livehtml:
@sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

clean:
rm -rf $(BUILDDIR)
rm -rf reference/python/generated
12 changes: 6 additions & 6 deletions ffi/docs/concepts/abi_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,17 +219,17 @@ typedef struct TVMFFIObject {
- `deleter` ensures that objects allocated from one language/runtime can be safely deleted in another.

The object format provides a unified way to manage object life-cycle and dynamic type casting
for heap-allocated objects, including Shape, NDArray,
for heap-allocated objects, including Shape, Tensor,
Function, Array, Map and other custom objects.


### DLPack Compatible NDArray
### DLPack Compatible Tensor

We provide first-class support for DLPack raw unmanaged pointer support as well as a managed NDArray object that
directly adopts the DLPack DLTensor layout. The overall layout of the NDArray object is as follows:
We provide first-class support for DLPack raw unmanaged pointer support as well as a managed Tensor object that
directly adopts the DLPack DLTensor layout. The overall layout of the Tensor object is as follows:

```c++
struct NDArrayObj: public ffi::Object, public DLTensor {
struct TensorObj: public ffi::Object, public DLTensor {
};
```

Expand All @@ -241,7 +241,7 @@ DLTensor* ReadDLTensorPtr(const TVMFFIAny *value) {
if (value->type_index == kTVMFFIDLTensorPtr) {
return static_cast<DLTensor*>(value->v_ptr);
}
assert(value->type_index == kTVMFFINDArray);
assert(value->type_index == kTVMFFITensor);
return reinterpret_cast<DLTensor*>(
reinterpret_cast<char*>(value->v_obj) + sizeof(TVMFFIObject));
}
Expand Down
3 changes: 3 additions & 0 deletions ffi/docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

import tomli


os.environ["TVM_FFI_BUILD_DOCS"] = "1"

# -- General configuration ------------------------------------------------

# Load version from pyproject.toml
Expand Down
Loading
Loading