From 936323bbed52308706b9606f15adf88597a28d20 Mon Sep 17 00:00:00 2001 From: Pratyush Patel Date: Wed, 28 Nov 2018 10:32:31 -0800 Subject: [PATCH] [BACKEND][CODEGEN] C codegen with tests (#2161) * Implement C code generation with tests * Code cleanup * Implement C code generation with tests * Code cleanup * tabs to spaces * make lint compliant * update export_library and reserve unique C keywords * move ReserveKeywordsAsUnique to codegen_c * some documentation and code cleanup * use tvm.contrib.util for tempdir in testcases --- python/tvm/_ffi/libinfo.py | 60 +++++ python/tvm/_ffi/runtime_ctypes.py | 1 + python/tvm/contrib/cc.py | 2 + python/tvm/module.py | 12 +- src/codegen/codegen_c.cc | 44 +++- src/codegen/codegen_c.h | 2 + src/codegen/codegen_c_host.cc | 252 +++++++++++++++++++ src/codegen/codegen_c_host.h | 40 +++ src/codegen/codegen_source_base.h | 7 + src/codegen/source_module.cc | 46 ++++ tests/python/unittest/test_codegen_c_host.py | 87 +++++++ 11 files changed, 544 insertions(+), 9 deletions(-) create mode 100644 src/codegen/codegen_c_host.cc create mode 100644 src/codegen/codegen_c_host.h create mode 100644 tests/python/unittest/test_codegen_c_host.py diff --git a/python/tvm/_ffi/libinfo.py b/python/tvm/_ffi/libinfo.py index f911829d38b17..2fdf5aeb132ac 100644 --- a/python/tvm/_ffi/libinfo.py +++ b/python/tvm/_ffi/libinfo.py @@ -99,6 +99,66 @@ def find_lib_path(name=None, search_path=None, optional=False): return lib_found +def find_include_path(name=None, search_path=None, optional=False): + """Find header files for C compilation. + + Parameters + ---------- + name : list of str + List of directory names to be searched. + + Returns + ------- + include_path : list(string) + List of all found paths to header files. + """ + ffi_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + source_dir = os.path.join(ffi_dir, "..", "..", "..") + install_include_dir = os.path.join(ffi_dir, "..", "..", "..", "..") + third_party_dir = os.path.join(source_dir, "3rdparty") + + header_path = [] + + if os.environ.get('TVM_INCLUDE_PATH', None): + header_path.append(os.environ['TVM_INCLUDE_PATH']) + + header_path.append(install_include_dir) + header_path.append(source_dir) + header_path.append(third_party_dir) + + header_path = [os.path.abspath(x) for x in header_path] + if search_path is not None: + if search_path is list: + header_path = header_path + search_path + else: + header_path.append(search_path) + if name is not None: + if isinstance(name, list): + tvm_include_path = [] + for n in name: + tvm_include_path += [os.path.join(p, n) for p in header_path] + else: + tvm_include_path = [os.path.join(p, name) for p in header_path] + dlpack_include_path = [] + else: + tvm_include_path = [os.path.join(p, 'include') for p in header_path] + dlpack_include_path = [os.path.join(p, 'dlpack/include') for p in header_path] + + # try to find include path + include_found = [p for p in tvm_include_path if os.path.exists(p) and os.path.isdir(p)] + include_found += [p for p in dlpack_include_path if os.path.exists(p) and os.path.isdir(p)] + + if not include_found: + message = ('Cannot find the files.\n' + + 'List of candidates:\n' + + str('\n'.join(tvm_include_path + dlpack_include_path))) + if not optional: + raise RuntimeError(message) + return None + + return include_found + + # current version # We use the version of the incoming release for code # that is under development. diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index b17487559e504..ef5316b5e2677 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -118,6 +118,7 @@ class TVMContext(ctypes.Structure): 'llvm': 1, 'stackvm': 1, 'cpu': 1, + 'c': 1, 'gpu': 2, 'cuda': 2, 'nvptx': 2, diff --git a/python/tvm/contrib/cc.py b/python/tvm/contrib/cc.py index 0ffa6c420243c..0361f594de6ab 100644 --- a/python/tvm/contrib/cc.py +++ b/python/tvm/contrib/cc.py @@ -7,6 +7,7 @@ from .._ffi.base import py_str from .util import tempdir +from .._ffi.libinfo import find_include_path def create_shared(output, @@ -49,6 +50,7 @@ def _linux_shared(output, objects, options, cc="g++"): cmd += objects if options: cmd += options + cmd += ["-I" + path for path in find_include_path()] proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate() diff --git a/python/tvm/module.py b/python/tvm/module.py index c5b5ac62409d9..e1f0b527a5016 100644 --- a/python/tvm/module.py +++ b/python/tvm/module.py @@ -115,17 +115,21 @@ def export_library(self, self.save(file_name) return - if self.type_key != "llvm": - raise ValueError("Module[%s]: Only llvm support export shared" % self.type_key) + if not (self.type_key == "llvm" or self.type_key == "c"): + raise ValueError("Module[%s]: Only llvm and c support export shared" % self.type_key) temp = _util.tempdir() if fcompile is not None and hasattr(fcompile, "object_format"): object_format = fcompile.object_format else: - object_format = "o" + if self.type_key == "llvm": + object_format = "o" + else: + assert self.type_key == "c" + object_format = "cc" path_obj = temp.relpath("lib." + object_format) self.save(path_obj) files = [path_obj] - is_system_lib = self.get_function("__tvm_is_system_module")() + is_system_lib = self.type_key == "llvm" and self.get_function("__tvm_is_system_module")() if self.imported_modules: path_cc = temp.relpath("devc.cc") with open(path_cc, "w") as f: diff --git a/src/codegen/codegen_c.cc b/src/codegen/codegen_c.cc index d902437dd9902..3624dc0403aa0 100644 --- a/src/codegen/codegen_c.cc +++ b/src/codegen/codegen_c.cc @@ -22,12 +22,43 @@ void CodeGenC::InitFuncState(LoweredFunc f) { handle_data_type_.clear(); CodeGenSourceBase::ClearFuncState(); } -void CodeGenC::AddFunction(LoweredFunc f) { - // clear previous generated state. - this->InitFuncState(f); + +void CodeGenC::ReserveKeywordsAsUnique() { // skip the first underscore, so SSA variable starts from _1 GetUniqueName("_"); GetUniqueName("extern"); + GetUniqueName("void"); + GetUniqueName("int"); + GetUniqueName("float"); + GetUniqueName("double"); + GetUniqueName("char"); + GetUniqueName("unsigned"); + GetUniqueName("short"); + GetUniqueName("long"); + GetUniqueName("if"); + GetUniqueName("else"); + GetUniqueName("switch"); + GetUniqueName("case"); + GetUniqueName("default"); + GetUniqueName("for"); + GetUniqueName("do"); + GetUniqueName("while"); + GetUniqueName("goto"); + GetUniqueName("register"); + GetUniqueName("continue"); + GetUniqueName("break"); + GetUniqueName("typedef"); + GetUniqueName("struct"); + GetUniqueName("enum"); + GetUniqueName("union"); + GetUniqueName("return"); +} + +void CodeGenC::AddFunction(LoweredFunc f) { + // clear previous generated state. + this->InitFuncState(f); + // reserve keywords + ReserveKeywordsAsUnique(); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { RegisterHandleType(kv.first.get(), kv.second.type()); @@ -187,6 +218,7 @@ std::string CodeGenC::GetStructRef( case intrinsic::kArrNDim: os << "ndim"; break; case intrinsic::kArrTypeCode: os << "dtype.code"; break; case intrinsic::kArrTypeBits: os << "dtype.bits"; break; + case intrinsic::kArrByteOffset: os << "byte_offset"; break; case intrinsic::kArrTypeLanes: os << "dtype.lanes"; break; case intrinsic::kArrDeviceId: os << "ctx.device_id"; break; case intrinsic::kArrDeviceType: os << "ctx.device_type"; break; @@ -834,8 +866,10 @@ void CodeGenC::VisitStmt_(const Evaluate *op) { } } std::string vid = this->PrintExpr(op->value); - this->PrintIndent(); - this->stream << "(void)" << vid << ";\n"; + if (vid != "") { + this->PrintIndent(); + this->stream << "(void)" << vid << ";\n"; + } } void CodeGenC::VisitStmt_(const ProducerConsumer *op) { diff --git a/src/codegen/codegen_c.h b/src/codegen/codegen_c.h index b36e37da54fef..c9af24a04a3cc 100644 --- a/src/codegen/codegen_c.h +++ b/src/codegen/codegen_c.h @@ -183,6 +183,8 @@ class CodeGenC : std::unordered_map alloc_storage_scope_; /*! \brief the data type of allocated buffers */ std::unordered_map handle_data_type_; + /*! \brief reserves common C keywords */ + void ReserveKeywordsAsUnique(); private: /*! \brief whether to print in SSA form */ diff --git a/src/codegen/codegen_c_host.cc b/src/codegen/codegen_c_host.cc new file mode 100644 index 0000000000000..248354dbc339b --- /dev/null +++ b/src/codegen/codegen_c_host.cc @@ -0,0 +1,252 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_c_host.cc + */ +#include +#include +#include +#include "codegen_c_host.h" +#include "build_common.h" + +namespace tvm { +namespace codegen { + +CodeGenCHost::CodeGenCHost() { + module_name = GetUniqueName("__tvm_module_ctx"); +} + +void CodeGenCHost::Init(bool output_ssa) { + decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n"; + decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n"; + decl_stream << "extern void* " << module_name << " = NULL;\n"; + CodeGenC::Init(output_ssa); +} + +void CodeGenCHost::AddFunction(LoweredFunc f) { + // clear previous generated state. + this->InitFuncState(f); + // reserve keywords + ReserveKeywordsAsUnique(); + // add to alloc buffer type. + for (const auto & kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + this->stream << "#ifdef __cplusplus\n"; + this->stream << "extern \"C\"\n"; + this->stream << "#endif\n"; + this->stream << "TVM_DLL int32_t " << f->name << "("; + for (size_t i = 0; i < f->args.size(); ++i) { + Var v = f->args[i]; + std::string vid = AllocVarID(v.get()); + if (i != 0) stream << ", "; + if (v.type().is_handle()) { + auto it = alloc_storage_scope_.find(v.get()); + if (it != alloc_storage_scope_.end()) { + PrintStorageScope(it->second, stream); + } + stream << ' '; + + if (handle_data_type_.count(v.get())) { + PrintType(handle_data_type_.at(v.get()), stream); + } else { + stream << "void"; + } + stream << "*"; + + if (f->is_restricted && restrict_keyword_.length() != 0) { + stream << ' ' << restrict_keyword_; + } + } else { + PrintType(v.type(), stream); + } + stream << ' ' << vid; + } + stream << ") {\n"; + this->PreFunctionBody(f); + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->PrintIndent(); + this->stream << "return 0;\n"; + this->EndScope(func_scope); + this->PrintIndent(); + this->stream << "}\n\n"; +} + +std::string CodeGenCHost::Finish() { + return CodeGenC::Finish(); +} + +void CodeGenCHost::PrintType(Type t, std::ostream& os) { // NOLINT(*) + int lanes = t.lanes(); + if (t.is_handle()) { + CHECK_EQ(lanes, 1) + << "does not support vector types"; + os << "void*"; return; + } + if (t == Bool()) { + os << "bool"; return; + } + bool fail = false; + if (t.is_float()) { + switch (t.bits()) { + case 16: + os << "half"; + break; + case 32: os << "float"; break; + case 64: + os << "double"; + break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } else if (t.is_uint() || t.is_int()) { + if (t.is_uint()) { + os << 'u'; + } + switch (t.bits()) { + case 8: os << "int8_t"; break; + case 16: os << "int16_t"; break; + case 32: os << "int32_t"; break; + case 64: os << "int64_t"; break; + case 1: os << "int32_t"; break; + default: fail = true; break; + } + if (!fail && lanes == 1) return; + if (!fail && (lanes >= 2 && lanes <= 16)) { + os << lanes; return; + } + } + LOG(FATAL) << "Cannot convert type " << t << " to C type"; +} + +void CodeGenCHost::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) + std::string v = PrintExpr(op->value); + os << "(("; + PrintType(op->type, os); + os << ")("; + for (int i = 0; i < op->lanes; ++i) { + if (i != 0) os << ", "; + os << v; + } + os << "))"; +} + +void CodeGenCHost::PrintGetFuncFromBackend(std::string func_name, std::string packed_func_name) { + this->PrintIndent(); + this->stream << "if (" << packed_func_name << " == NULL) {\n"; + int packed_func_if_scope = this->BeginScope(); + this->PrintIndent(); + this->stream << "if (TVMBackendGetFuncFromEnv(" << module_name + << ", \"" << func_name << "\"" + << ", &" << packed_func_name << ") != 0) {\n"; + int get_func_env_scope = this->BeginScope(); + this->PrintIndent(); + this->stream << "return -1;\n"; + this->EndScope(get_func_env_scope); + this->PrintIndent(); + this->stream << "}\n"; + this->EndScope(packed_func_if_scope); + this->PrintIndent(); + this->stream << "}\n"; +} + +void CodeGenCHost::PrintFuncCall(std::string packed_func_name, int num_args) { + this->PrintIndent(); + std::string ret_val = GetUniqueName("ret_val"); + std::string ret_type_code = GetUniqueName("ret_type_code"); + this->stream << "TVMValue " << ret_val << ";\n"; + this->PrintIndent(); + this->stream << "int " << ret_type_code << ";\n"; + this->PrintIndent(); + this->stream << "if (TVMFuncCall(" << packed_func_name << ", " + << "(TVMValue*) stack_value" << ", " << "(int*) stack_tcode" << ", " + << num_args << ", " << "&" << ret_val << ", " << "&" + << ret_type_code << ") != 0) {\n"; + int func_call_scope = this->BeginScope(); + this->PrintIndent(); + this->stream << "return -1;\n"; + this->EndScope(func_call_scope); + this->PrintIndent(); + this->stream << "}\n"; +} + +void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) + if (op->is_intrinsic(intrinsic::tvm_stack_alloca)) { + std::string stack_name = GetUniqueName("stack"); + const std::string& type = op->args[0].as()->value; + const IntImm* num = op->args[1].as(); + CHECK(num != nullptr); + static_assert(alignof(TVMValue) % alignof(TVMArray) == 0, "invariant"); + size_t unit = sizeof(TVMValue); + size_t size = 0; + if (type == "shape") { + size = (num->value * sizeof(tvm_index_t) + unit - 1) / unit; + } else if (type == "arg_value") { + size = (num->value * sizeof(TVMValue) + unit - 1) / unit; + } else if (type == "arg_tcode") { + size = (num->value * sizeof(int) + unit - 1) / unit; + } else if (type == "array") { + size = (num->value * sizeof(TVMArray) + unit - 1) / unit; + } else { + LOG(FATAL) << "Unknown stack alloca type " << type; + } + this->PrintIndent(); + this->stream << "TVMValue " << stack_name << "[" << size << "];\n"; + os << stack_name; + } else if (op->is_intrinsic(intrinsic::tvm_call_packed_lowered)) { + const StringImm* s = op->args[0].as(); + CHECK(s != nullptr) << "tvm_call_packed_lowered expects first argument as function name"; + int64_t begin = op->args[3].as()->value; + int64_t end = op->args[4].as()->value; + int64_t num_args = end - begin; + CHECK_GE(num_args, 0); + std::string func_name = s->value; + std::string packed_func_name = GetUniqueName(func_name + "_packed"); + decl_stream << "static void* " << packed_func_name << " = NULL;\n"; + this->PrintGetFuncFromBackend(func_name, packed_func_name); + this->PrintFuncCall(packed_func_name, num_args); + } else if (op->is_intrinsic(intrinsic::tvm_throw_last_error)) { + this->PrintIndent(); + this->stream << "return -1;\n"; + } else { + CodeGenC::VisitExpr_(op, os); + } +} + +void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) + std::string cond = PrintExpr(op->condition); + PrintIndent(); + stream << "if (!(" << cond << ")) {\n"; + int assert_if_scope = this->BeginScope(); + PrintIndent(); + stream << "TVMAPISetLastError(\"" << op->message.as()->value << "\");\n"; + PrintIndent(); + stream << "return -1;\n"; + this->EndScope(assert_if_scope); + PrintIndent(); + stream << "}\n"; + this->PrintStmt(op->body); +} + +runtime::Module BuildCHost(Array funcs) { + using tvm::runtime::Registry; + bool output_ssa = false; + CodeGenCHost cg; + cg.Init(output_ssa); + for (LoweredFunc f : funcs) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + return CSourceModuleCreate(code, "c"); +} + +TVM_REGISTER_API("codegen.build_c") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildCHost(args[0]); + }); +} // namespace codegen +} // namespace tvm diff --git a/src/codegen/codegen_c_host.h b/src/codegen/codegen_c_host.h new file mode 100644 index 0000000000000..eb47a7829e2cd --- /dev/null +++ b/src/codegen/codegen_c_host.h @@ -0,0 +1,40 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_c_host.h + * \brief Generate C host code. + */ +#ifndef TVM_CODEGEN_CODEGEN_C_HOST_H_ +#define TVM_CODEGEN_CODEGEN_C_HOST_H_ + +#include +#include +#include +#include "codegen_c.h" + +namespace tvm { +namespace codegen { + +class CodeGenCHost final : public CodeGenC { + public: + CodeGenCHost(); + void Init(bool output_ssa); + void AddFunction(LoweredFunc f); + std::string Finish(); + + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) + + // overload visitor functions + void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const Call *op, std::ostream& os) final; // NOLINT(*) + void VisitStmt_(const AssertStmt *op) final; // NOLINT(*) + + private: + std::string module_name; + void PrintGetFuncFromBackend(std::string func_name, std::string packed_func_name); + void PrintFuncCall(std::string packed_func_name, int num_args); +}; + +} // namespace codegen +} // namespace tvm + +#endif // TVM_CODEGEN_CODEGEN_C_HOST_H_ diff --git a/src/codegen/codegen_source_base.h b/src/codegen/codegen_source_base.h index d2f80a538a337..3fc46c35c7f70 100644 --- a/src/codegen/codegen_source_base.h +++ b/src/codegen/codegen_source_base.h @@ -112,6 +112,13 @@ class CodeGenSourceBase { */ runtime::Module SourceModuleCreate(std::string code, std::string fmt); +/*! + * \brief Create a C source module for viewing and compiling GCC code. + * \param code The code to be viewed. + * \param fmt The code. format. + */ +runtime::Module CSourceModuleCreate(std::string code, std::string fmt); + /*! * \brief Create a source module for viewing and limited saving for device. * \param data The code data to be viewed. diff --git a/src/codegen/source_module.cc b/src/codegen/source_module.cc index c7100e18735ed..56facea1567f2 100644 --- a/src/codegen/source_module.cc +++ b/src/codegen/source_module.cc @@ -53,6 +53,52 @@ runtime::Module SourceModuleCreate(std::string code, std::string fmt) { return runtime::Module(n); } +// Simulator function +class CSourceModuleNode : public runtime::ModuleNode { + public: + CSourceModuleNode(std::string code, + std::string fmt) + : code_(code), fmt_(fmt) {} + const char* type_key() const { + return "c"; + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final { + LOG(FATAL) << "C Source module cannot execute, to get executable module" + << " build TVM with \'" << fmt_ << "\' runtime support"; + return PackedFunc(); + } + + std::string GetSource(const std::string& format) final { + return code_; + } + + void SaveToFile(const std::string& file_name, + const std::string& format) final { + std::string fmt = GetFileFormat(file_name, format); + std::string meta_file = GetMetaFilePath(file_name); + if (fmt == "cc") { + CHECK_NE(code_.length(), 0); + SaveBinaryToFile(file_name, code_); + } else { + CHECK_EQ(fmt, fmt_) + << "Can only save to format=" << fmt_; + } + } + + protected: + std::string code_; + std::string fmt_; +}; + +runtime::Module CSourceModuleCreate(std::string code, std::string fmt) { + std::shared_ptr n = + std::make_shared(code, fmt); + return runtime::Module(n); +} + // supports limited save without cross compile class DeviceSourceModuleNode final : public runtime::ModuleNode { public: diff --git a/tests/python/unittest/test_codegen_c_host.py b/tests/python/unittest/test_codegen_c_host.py new file mode 100644 index 0000000000000..00acbeb88fcf7 --- /dev/null +++ b/tests/python/unittest/test_codegen_c_host.py @@ -0,0 +1,87 @@ +import tvm +import numpy as np +from tvm.contrib import util + +def test_add(): + nn = 1024 + n = tvm.convert(nn) + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.create_schedule(C.op) + + def check_c(): + f1 = tvm.lower(s, [A, B, C], name="fadd") + fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)] + fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0]) + mhost = tvm.codegen.build_module(fsplits[0], "c") + temp = util.tempdir() + path_dso = temp.relpath("temp.so") + mhost.export_library(path_dso) + m = tvm.module.load(path_dso) + fadd = m['fadd'] + ctx = tvm.cpu(0) + # launch the kernel. + n = nn + a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx) + c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx) + fadd(a, b, c) + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + check_c() + +def test_add_pipeline(): + nn = 1024 + n = tvm.convert(nn) + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + AA = tvm.compute((n,), lambda *i: A(*i), name='A') + BB = tvm.compute((n,), lambda *i: B(*i), name='B') + T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = tvm.compute(A.shape, lambda *i: T(*i), name='C') + s = tvm.create_schedule(C.op) + xo, xi = s[C].split(C.op.axis[0], factor=4) + xo1, xo2 = s[C].split(xo, factor=13) + s[C].parallel(xo2) + s[C].pragma(xo1, "parallel_launch_point") + s[C].pragma(xo2, "parallel_stride_pattern") + s[C].pragma(xo2, "parallel_barrier_when_finish") + s[C].vectorize(xi) + + def check_c(): + if not tvm.module.enabled("llvm"): + return + # Specifically allow offset to test codepath when offset is available + Ab = tvm.decl_buffer( + A.shape, A.dtype, + elem_offset=tvm.var('Aoffset'), + offset_factor=8, + name='A') + binds = {A : Ab} + # BUILD and invoke the kernel. + f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline") + fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)] + fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0]) + mhost = tvm.codegen.build_module(fsplits[0], "c") + temp = util.tempdir() + path_dso = temp.relpath("temp.so") + mhost.export_library(path_dso) + m = tvm.module.load(path_dso) + fadd = m["fadd_pipeline"] + ctx = tvm.cpu(0) + # launch the kernel. + n = nn + a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx) + c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx) + fadd(a, b, c) + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + + with tvm.build_config(offset_factor=4): + check_c() + +if __name__ == "__main__": + test_add() + test_add_pipeline()