Skip to content

Commit

Permalink
use packed func macro for external codegen (apache#4710)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiics authored and alexwong committed Feb 26, 2020
1 parent 8b6aeaa commit ce5716a
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 104 deletions.
11 changes: 9 additions & 2 deletions python/tvm/_ffi/libinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,20 @@ def find_include_path(name=None, search_path=None, optional=False):
else:
tvm_include_path = [os.path.join(p, name) for p in header_path]
dlpack_include_path = []
dmlc_include_path = []
else:
tvm_include_path = [os.path.join(p, 'include') for p in header_path]
dlpack_include_path = [os.path.join(p, 'dlpack/include') for p in header_path]
dlpack_include_path = [os.path.join(p, 'dlpack/include') for p in
header_path]
dmlc_include_path = [os.path.join(p, 'dmlc-core/include') for p in
header_path]

# try to find include path
include_found = [p for p in tvm_include_path if os.path.exists(p) and os.path.isdir(p)]
include_found += [p for p in dlpack_include_path if os.path.exists(p) and os.path.isdir(p)]
include_found += [p for p in dlpack_include_path if os.path.exists(p)
and os.path.isdir(p)]
include_found += [p for p in dmlc_include_path if os.path.exists(p)
and os.path.isdir(p)]

if not include_found:
message = ('Cannot find the files.\n' +
Expand Down
5 changes: 1 addition & 4 deletions src/relay/backend/contrib/codegen_c/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,9 @@ class CSourceCodegen : public CSourceModuleCodegenBase {

runtime::Module CreateCSourceModule(const ObjectRef& ref) override {
// Create headers
code_stream_ << "#include <cstdint>\n";
code_stream_ << "#include <iostream>\n";
code_stream_ << "#include <cstdlib>\n";
code_stream_ << "#include <stdio.h>\n";
code_stream_ << "#include <cstring>\n";
code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
code_stream_ << "#include <dlpack/dlpack.h>\n";

// Append some common macro for operator definition.
Expand Down
57 changes: 23 additions & 34 deletions src/relay/backend/contrib/codegen_c/codegen_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,63 +99,52 @@ class CodegenCBase {
* \code
*
* // An example code for the generated C function.
* extern "C" void foo(TVMValue* value, int* type_code, int nargs) {
* if (nargs != 3) {
* printf("foo expects 3 args, but received %d\n", nargs);
* return 1;
* }
*
* DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
* DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
* DLTensor* out = static_cast<DLTensor*>(value[2].v_handle);
*
* extern "C" void foo_wrapper_(DLTensor* arg0,
* DLTensor* arg1,
* DLTensor* out) {
* foo_(static_cast<float*>(arg0->data),
* static_cast<float*>(arg1->data),
* static_cast<float*>(out->data));
* return 0;
* }
*
* TVM_DLL_EXPORT_TYPED_FUNC(foo, foo_wrapper_);
*
* \endcode
*/
void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) {
// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name;
code_stream_ << "(TVMValue* value, int* type_code, int nargs) {\n";
EnterScope();
// Print guard
PrintIndents();
code_stream_ << "if (nargs != " << arg_cnt << "){\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
for (int i = 0; i < arg_cnt - 1; i++) {
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
}
if (arg_cnt > 0) {
code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n";
}

EnterScope();
PrintIndents();
code_stream_ << "printf(\"" << func_name << " expects " << arg_cnt
<< " arguments, but received %d\\n\", nargs);\n";
PrintIndents();
code_stream_ << "return 1;\n";
ExitScope();
PrintIndents();
code_stream_ << "}\n";

// According to TVM's calling convention, the last one is output.
for (int i = 0; i < arg_cnt; i++) {
PrintIndents();
code_stream_ << "DLTensor* arg" << i << " = "
<< "static_cast<DLTensor*>(value[" << i << "].v_handle);\n";
}
// Generate the call.
// Generate the internal call.
PrintIndents();
code_stream_ << func_name << "_(";
for (int i = 0; i < arg_cnt - 1; i++) {
code_stream_ << "static_cast<float*>(arg" << i << "->data), ";
code_stream_ << "static_cast<float*>(arg" << i << "->data),\n";
PrintIndents();
}
if (arg_cnt > 0) {
code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)";
}
code_stream_ << ");\n\n";
code_stream_ << ");\n";
PrintIndents();
code_stream_ << "return 0;\n";
ExitScope();
code_stream_ << "}";
code_stream_ << "}\n\n";

// Generate the macro
code_stream_ << "TVM_DLL_EXPORT_TYPED_FUNC(" << func_name << ", "
<< func_name << "_wrapper_);\n\n";
}

/*!
Expand Down
1 change: 1 addition & 0 deletions src/relay/backend/contrib/dnnl/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
code_stream_ << "#include <cstdlib>\n";
code_stream_ << "#include <cstring>\n";
code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
code_stream_ << "#include <dlpack/dlpack.h>\n";
// dnnl_kernel file is saved under src/runtime/contrib/dnnl so that we don't
// expose it to ordinary users. To make export_library use it, users need to
Expand Down
111 changes: 47 additions & 64 deletions tests/python/relay/test_external_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def generate_csource_module():

code = r'''
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h>
#include <dlpack/dlpack.h>
#include <cstdint>
#include <cstring>
Expand Down Expand Up @@ -69,22 +70,17 @@ def generate_csource_module():
free(buf_1);
}
extern "C" int json_rt_1(TVMValue* value, int* type_code, int nargs) {
if (nargs != 5) {
printf("Expect 5 args, but get %d", nargs);
return 1;
}
DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
extern "C" int ccompiler_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
DLTensor* arg2, DLTensor* arg3,
DLTensor* out) {
gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
}
TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, ccompiler_wrapper_1_);
GCC_BINARY_OP_2D(gcc_0_0, *, 10, 10);
GCC_BINARY_OP_2D(gcc_0_1, -, 10, 10);
GCC_BINARY_OP_2D(gcc_0_2, +, 10, 10);
Expand All @@ -100,21 +96,17 @@ def generate_csource_module():
free(buf_1);
}
extern "C" int json_rt_0(TVMValue* value, int* type_code, int nargs) {
if (nargs != 5) {
printf("Expect 5 args, but get %d", nargs);
return 1;
}
DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
extern "C" int ccompiler_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
DLTensor* arg2, DLTensor* arg3,
DLTensor* out) {
gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
}
TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, ccompiler_wrapper_0_);
'''
csource_module = _tvm_module.csource_module_create(code, "cc")
return csource_module
Expand All @@ -128,81 +120,72 @@ def generate_engine_module():

code = r'''
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h>
#include <dlpack/dlpack.h>
#include "gcc_engine.h"
#include "json_engine.h"
extern "C" void gcc_1_(float* gcc_input4, float* gcc_input5,
float* gcc_input6, float* gcc_input7, float* out) {
extern "C" void json_1_(float* json_input4, float* json_input5,
float* json_input6, float* json_input7, float* out) {
std::string graph =
"add_2d,10,10\n"
"sub_2d,10,10\n"
"mul_2d,10,10\n";
Engine engine;
engine.run(graph, {gcc_input4, gcc_input5, gcc_input6, gcc_input7}, out);
engine.run(graph, {json_input4, json_input5, json_input6, json_input7}, out);
}
extern "C" int json_rt_1(TVMValue* value, int* type_code, int nargs) {
if (nargs != 5) {
printf("Expect 5 args, but get %d", nargs);
return 1;
}
DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
extern "C" int json_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
DLTensor* arg2, DLTensor* arg3,
DLTensor* out) {
json_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
}
extern "C" void gcc_0_(float* gcc_input0, float* gcc_input1,
float* gcc_input2, float* gcc_input3, float* out) {
TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, json_wrapper_1_);
extern "C" void json_0_(float* json_input0, float* json_input1,
float* json_input2, float* json_input3, float* out) {
std::string graph =
"add_2d,10,10\n"
"sub_2d,10,10\n"
"mul_2d,10,10\n";
Engine engine;
engine.run(graph, {gcc_input0, gcc_input1, gcc_input2, gcc_input3}, out);
engine.run(graph, {json_input0, json_input1, json_input2, json_input3}, out);
}
extern "C" int json_rt_0(TVMValue* value, int* type_code, int nargs) {
if (nargs != 5) {
printf("Expect 5 args, but get %d", nargs);
return 1;
}
DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
extern "C" int json_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
DLTensor* arg2, DLTensor* arg3,
DLTensor* out) {
json_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
static_cast<float*>(out->data));
return 0;
}
TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, json_wrapper_0_);
'''

gen_gcc_engine()
gen_json_engine()
csource_module = _tvm_module.csource_module_create(code, "cc")
return csource_module


def gen_gcc_engine():
def gen_json_engine():
"""An example of external backend runtime engine. This is supposed to be provided
by third-party vendors and included when building the generated external kernel code.
"""

code = r'''
#ifndef _GCC_ENGINE_H_
#define _GCC_ENGINE_H_
#ifndef _JSON_ENGINE_H_
#define _JSON_ENGINE_H_
#include <cstdint>
#include <string>
#include <sstream>
Expand Down Expand Up @@ -298,9 +281,9 @@ class Engine {
std::vector<float*> buffers;
};
#endif
#endif // _JSON_ENGINE_H_
'''
header_file = tmp_path.relpath("gcc_engine.h")
header_file = tmp_path.relpath("json_engine.h")
with open(header_file, 'w') as f:
f.write(code)

Expand Down

0 comments on commit ce5716a

Please sign in to comment.