From f84491f7923a074931b614181c7aa74435590b27 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 17 Jan 2017 21:37:21 -0800 Subject: [PATCH] [TESTCASE] Add a mock test workflow of CUDA codegen --- python/tvm/function.py | 27 ++++++++++-------- python/tvm/schedule.py | 2 -- src/schedule/bound.cc | 3 +- tests/python/test_codegen_cuda.py | 46 +++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 14 deletions(-) create mode 100644 tests/python/test_codegen_cuda.py diff --git a/python/tvm/function.py b/python/tvm/function.py index 72ec0d2680de..72929da80188 100644 --- a/python/tvm/function.py +++ b/python/tvm/function.py @@ -117,11 +117,13 @@ def compute(shape, fcompute, name="compute"): The created tensor """ shape = (shape,) if isinstance(shape, _expr.Expr) else shape - ndim = len(shape) arg_names = fcompute.__code__.co_varnames + + if fcompute.__code__.co_argcount == 0 and len(arg_names) == 1: + arg_names = ["i%d" % i for i in range(ndim)] if ndim != len(arg_names): - raise ValueError("fcompute do not match dimension") + raise ValueError("fcompute do not match dimension, ndim=%d" % ndim) dim_var = [IterVar((0, s), x) for x, s in zip(arg_names, shape)] body = fcompute(*[v.var for v in dim_var]) @@ -170,7 +172,7 @@ def Buffer(shape, dtype=None, name, ptr, shape, strides, dtype) -def IterVar(dom, name='iter', thread_tag=''): +def IterVar(dom=None, name=None, thread_tag=''): """Create a iteration variable Parameters @@ -189,14 +191,17 @@ def IterVar(dom, name='iter', thread_tag=''): iter_var : IterVar The result itervar """ - if isinstance(dom, (list, tuple)): - if len(dom) != 2: - raise ValueError("need to list of ranges") - dom = Range(dom[0], dom[1]) - - if not isinstance(dom, _collections.Range): - raise ValueError("dom need to be Range") - + if dom is not None: + if isinstance(dom, (list, tuple)): + if len(dom) != 2: + raise ValueError("need to list of ranges") + dom = Range(dom[0], dom[1]) + + if not isinstance(dom, _collections.Range): + raise ValueError("dom need to be Range") + if name is None: + name = thread_tag if thread_tag else name + name = name if name else 'iter' return _function_internal._IterVar(dom, name, thread_tag) diff --git a/python/tvm/schedule.py b/python/tvm/schedule.py index b46c5866082f..b276c90a14a6 100644 --- a/python/tvm/schedule.py +++ b/python/tvm/schedule.py @@ -56,8 +56,6 @@ def split(self, parent, factor=None, outer=None): if outer is not None: if outer.thread_tag == '': raise ValueError("split by outer must have special thread_tag") - if outer.dom is None: - raise ValueError("split by outer must have specified domain") inner = _function_internal._StageSplitByOuter(self, parent, outer, factor) else: if factor is None: diff --git a/src/schedule/bound.cc b/src/schedule/bound.cc index 6a23f48d8c90..d4ce520c9229 100644 --- a/src/schedule/bound.cc +++ b/src/schedule/bound.cc @@ -5,6 +5,7 @@ */ #include #include +#include #include #include "./int_set.h" #include "./graph.h" @@ -14,7 +15,7 @@ namespace schedule { // result = ceil((a / b)), both a and b are positive integer inline Expr DivCeil(Expr a, Expr b) { - return (a + b - 1) / b; + return ir::Simplify((a + b - 1) / b); } // Downward message passing algorithm on stage schedule s, diff --git a/tests/python/test_codegen_cuda.py b/tests/python/test_codegen_cuda.py new file mode 100644 index 000000000000..b93e80e52059 --- /dev/null +++ b/tests/python/test_codegen_cuda.py @@ -0,0 +1,46 @@ +import tvm +import numpy + +def mock_test_add(): + """Not yet working, mock design""" + n = tvm.Var('n') + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.Schedule(C.op) + + # GPU schedule have to split by gridIdx and threadIdx + num_thread = 256 + grid_x = tvm.IterVar(thread_tag="gridIdx.x") + thread_x = tvm.IterVar((0, num_thread), thread_tag="threadIdx.x") + _, x = s[C].split(C.op.axis[0], factor=num_thread, outer=grid_x) + _, x = s[C].split(x, outer=thread_x) + # compile to IR + bounds = tvm.schedule.InferBound(s) + stmt = tvm.ir_pass.ScheduleOps(s, bounds) + + Ab = tvm.Buffer(A.shape, A.dtype, name='A') + Bb = tvm.Buffer(B.shape, B.dtype, name='B') + Cb = tvm.Buffer(C.shape, C.dtype, name='C') + + def codegen(): + # generate host/device code + host_code, device_code = tvm.codegen.GenCUDA( + s, + inputs={A: Ab, B:Bb}, + outputs={C: Cb}, + args=[A, B, C]) + # generate a function based on the code + f = tvm.cuda.build_function(host_code, device_code) + # create arrays + a = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0)) + b = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0)) + c = tvm.nd.array(np.zeros(10), ctx=tvm.gpu(0)) + # calll the generated code + f(a, b, c) + # sync the result + np.testing.assert_equal(c.asnumpy(), np.ones(10) * 2) + + +if __name__ == "__main__": + mock_test_add()