1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15+ from __future__ import annotations
16+
17+ import platform
1518import unittest
19+ from typing import TYPE_CHECKING
20+
21+ import numpy as np
22+ import tvm_ffi .cpp
1623
1724import paddle
1825
26+ if TYPE_CHECKING :
27+ from tvm_ffi import Module
28+
1929
20- class TestTVMFFI (unittest .TestCase ):
30+ class TestTVMFFIEnvStream (unittest .TestCase ):
2131 def test_tvm_ffi_env_stream_for_gpu_tensor (self ):
2232 if not paddle .is_compiled_with_cuda ():
2333 return
@@ -34,5 +44,113 @@ def test_tvm_ffi_env_stream_for_cpu_tensor(self):
3444 tensor .__tvm_ffi_env_stream__ ()
3545
3646
47+ class TestCDLPackExchangeAPI (unittest .TestCase ):
48+ def test_c_dlpack_exchange_api_cpu (self ):
49+ cpp_source = r"""
50+ void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
51+ // implementation of a library function
52+ TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor";
53+ DLDataType f32_dtype{kDLFloat, 32, 1};
54+ TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor";
55+ TVM_FFI_ICHECK(y->ndim == 1) << "y must be a 1D tensor";
56+ TVM_FFI_ICHECK(y->dtype == f32_dtype) << "y must be a float tensor";
57+ TVM_FFI_ICHECK(x->shape[0] == y->shape[0]) << "x and y must have the same shape";
58+ for (int i = 0; i < x->shape[0]; ++i) {
59+ static_cast<float*>(y->data)[i] = static_cast<float*>(x->data)[i] + 1;
60+ }
61+ }
62+ """
63+
64+ mod : Module = tvm_ffi .cpp .load_inline (
65+ name = 'mod' , cpp_sources = cpp_source , functions = 'add_one_cpu'
66+ )
67+
68+ x = paddle .full ((3 ,), 1.0 , dtype = 'float32' ).cpu ()
69+ y = paddle .zeros ((3 ,), dtype = 'float32' ).cpu ()
70+ mod .add_one_cpu (x , y )
71+ np .testing .assert_allclose (y .numpy (), [2.0 , 2.0 , 2.0 ])
72+
73+ def test_c_dlpack_exchange_api_gpu (self ):
74+ if not paddle .is_compiled_with_cuda ():
75+ return
76+ if paddle .is_compiled_with_rocm ():
77+ # Skip on DCU because CUDA_HOME is not available
78+ return
79+ if platform .system () == "Windows" :
80+ # Temporary skip this test case on windows because compile bug on TVM FFI
81+ return
82+ cpp_sources = r"""
83+ void add_one_cuda(tvm::ffi::TensorView x, tvm::ffi::TensorView y);
84+ """
85+ cuda_sources = r"""
86+ __global__ void AddOneKernel(float* x, float* y, int n) {
87+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
88+ if (idx < n) {
89+ y[idx] = x[idx] + 1;
90+ }
91+ }
92+
93+ void add_one_cuda(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
94+ // implementation of a library function
95+ TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor";
96+ DLDataType f32_dtype{kDLFloat, 32, 1};
97+ TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor";
98+ TVM_FFI_ICHECK(y->ndim == 1) << "y must be a 1D tensor";
99+ TVM_FFI_ICHECK(y->dtype == f32_dtype) << "y must be a float tensor";
100+ TVM_FFI_ICHECK(x->shape[0] == y->shape[0]) << "x and y must have the same shape";
101+
102+ int64_t n = x->shape[0];
103+ int64_t nthread_per_block = 256;
104+ int64_t nblock = (n + nthread_per_block - 1) / nthread_per_block;
105+ // Obtain the current stream from the environment by calling TVMFFIEnvGetStream
106+ cudaStream_t stream = static_cast<cudaStream_t>(
107+ TVMFFIEnvGetStream(x->device.device_type, x->device.device_id));
108+ // launch the kernel
109+ AddOneKernel<<<nblock, nthread_per_block, 0, stream>>>(static_cast<float*>(x->data),
110+ static_cast<float*>(y->data), n);
111+ }
112+ """
113+ mod : Module = tvm_ffi .cpp .load_inline (
114+ name = 'mod' ,
115+ cpp_sources = cpp_sources ,
116+ cuda_sources = cuda_sources ,
117+ functions = ['add_one_cuda' ],
118+ )
119+
120+ x = paddle .full ((3 ,), 1.0 , dtype = 'float32' ).cuda ()
121+ y = paddle .zeros ((3 ,), dtype = 'float32' ).cuda ()
122+ mod .add_one_cuda (x , y )
123+ np .testing .assert_allclose (y .numpy (), [2.0 , 2.0 , 2.0 ])
124+
125+ def test_c_dlpack_exchange_api_alloc_tensor (self ):
126+ if platform .system () == "Windows" :
127+ # Temporary skip this test case on windows because return owned tensor created by
128+ # TVMFFIEnvGetTensorAllocator will cause double free error
129+ return
130+ cpp_source = r"""
131+ inline tvm::ffi::Tensor alloc_tensor(tvm::ffi::Shape shape, DLDataType dtype, DLDevice device) {
132+ return tvm::ffi::Tensor::FromDLPackAlloc(TVMFFIEnvGetTensorAllocator(), shape, dtype, device);
133+ }
134+
135+ tvm::ffi::Tensor add_one_cpu(tvm::ffi::TensorView x) {
136+ TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor";
137+ DLDataType f32_dtype{kDLFloat, 32, 1};
138+ TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor";
139+ tvm::ffi::Shape x_shape(x->shape, x->shape + x->ndim);
140+ tvm::ffi::Tensor y = alloc_tensor(x_shape, f32_dtype, x->device);
141+ for (int i = 0; i < x->shape[0]; ++i) {
142+ static_cast<float*>(y->data)[i] = static_cast<float*>(x->data)[i] + 1;
143+ }
144+ return y;
145+ }
146+ """
147+ mod : Module = tvm_ffi .cpp .load_inline (
148+ name = 'mod' , cpp_sources = cpp_source , functions = ['add_one_cpu' ]
149+ )
150+ x = paddle .full ((3 ,), 1.0 , dtype = 'float32' ).cpu ()
151+ y = mod .add_one_cpu (x )
152+ np .testing .assert_allclose (y .numpy (), [2.0 , 2.0 , 2.0 ])
153+
154+
37155if __name__ == '__main__' :
38156 unittest .main ()
0 commit comments