diff --git a/include/tvm/ir/module.h b/include/tvm/ir/module.h index fdb44b11887c..85cb3c76134d 100644 --- a/include/tvm/ir/module.h +++ b/include/tvm/ir/module.h @@ -503,6 +503,35 @@ constexpr const char* kConstants = "constants"; */ constexpr const char* kExternalMods = "external_mods"; +/*! + * \brief A prefix for generating C symbols system lib creation. + * + * This prefix guides passes that creates global_symbol for internal functions + * that may have c linkage (e.g. TIR functions and some BYOC functions). It also affects + * the symbol of the fat bin blob during module export. + * + * This attribute is used to avoid symbol conflict when we + * generate and combine multiple system libs that get linked into one. + * + * Rationale: mechanisms like BYOC rely on the common global symbol + * and each external compiler also has its own mechanism of mangling. + * As a result, we cannot rely on other mechanisms on setting a global_symbol and then renaming, + * because the external compiler already agreed on the name. + * + * system_lib_prefix provides a way to hint at the passes to allow names to + * avoid name conflict at the beginning. + * + * Note that users can still directly specify global symbols that may conflict. + * It is up to the downstream toolchain to manage those external-facing functions. + * + * This does not affect non-C linkage functions it is less of an issue because + * they will be embedded into fatbin that in different symbols, + * The system lib loader can pick the right prefix for a given prefix. + * + * Having this attribute implies system lib generation linkage. + */ +constexpr const char* kSystemLibPrefix = "system_lib_prefix"; + /*! * \brief All the named runtime::NDArrays accumulated during compilation by external codegen. * Generally the associated runtime::Module will indicate it requires bindings for these names, diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h index 508b34b3517e..3da4945c86fd 100644 --- a/include/tvm/runtime/module.h +++ b/include/tvm/runtime/module.h @@ -278,8 +278,6 @@ constexpr const char* tvm_get_c_metadata = "get_c_metadata"; constexpr const char* tvm_module_ctx = "__tvm_module_ctx"; /*! \brief Global variable to store device module blob */ constexpr const char* tvm_dev_mblob = "__tvm_dev_mblob"; -/*! \brief Number of bytes of device module blob. */ -constexpr const char* tvm_dev_mblob_nbytes = "__tvm_dev_mblob_nbytes"; /*! \brief global function to set device */ constexpr const char* tvm_set_device = "__tvm_set_device"; /*! \brief Auxiliary counter to global barrier. */ diff --git a/include/tvm/target/codegen.h b/include/tvm/target/codegen.h index b2cab0e4bc45..46a19ad71b60 100644 --- a/include/tvm/target/codegen.h +++ b/include/tvm/target/codegen.h @@ -55,9 +55,11 @@ runtime::Module Build(IRModule mod, Target target); * * \param m The host module with the imports. * \param system_lib Whether expose as system library. + * \param c_symbol_prefix Optional symbol prefix of the blob symbol. * \return cstr The C string representation of the file. */ -std::string PackImportsToC(const runtime::Module& m, bool system_lib); +std::string PackImportsToC(const runtime::Module& m, bool system_lib, + const std::string& c_symbol_prefix = ""); /*! * \brief Pack imported device library to a LLVM module. @@ -68,10 +70,13 @@ std::string PackImportsToC(const runtime::Module& m, bool system_lib); * \param m The host module with the imports. * \param system_lib Whether expose as system library. * \param target_triple LLVM target triple + * \param c_symbol_prefix Optional symbol prefix of the blob symbol. + * * \return runtime::Module The generated LLVM module. */ runtime::Module PackImportsToLLVM(const runtime::Module& m, bool system_lib, - const std::string& target_triple); + const std::string& target_triple, + const std::string& c_symbol_prefix = ""); } // namespace codegen } // namespace tvm #endif // TVM_TARGET_CODEGEN_H_ diff --git a/python/tvm/runtime/module.py b/python/tvm/runtime/module.py index c78a6d9c3136..671e3a390e0d 100644 --- a/python/tvm/runtime/module.py +++ b/python/tvm/runtime/module.py @@ -508,6 +508,7 @@ def export_library(self, file_name, fcompile=None, addons=None, workspace_dir=No files = addons if addons else [] is_system_lib = False has_c_module = False + system_lib_prefix = None llvm_target_string = None global_object_format = "o" for index, module in enumerate(modules): @@ -549,6 +550,8 @@ def export_library(self, file_name, fcompile=None, addons=None, workspace_dir=No if module.type_key == "llvm": is_system_lib = module.get_function("__tvm_is_system_module")() llvm_target_string = module.get_function("_get_target_string")() + system_lib_prefix = module.get_function("__tvm_get_system_lib_prefix")() + if not fcompile: if file_name.endswith(".tar"): fcompile = _tar.tar @@ -564,15 +567,21 @@ def export_library(self, file_name, fcompile=None, addons=None, workspace_dir=No raise ValueError("%s need --system-lib option" % str(fcompile)) if self.imported_modules: + pack_lib_prefix = system_lib_prefix if system_lib_prefix else "" + if enabled("llvm") and llvm_target_string: - path_obj = os.path.join(workspace_dir, f"devc.{global_object_format}") - m = _ffi_api.ModulePackImportsToLLVM(self, is_system_lib, llvm_target_string) + path_obj = os.path.join( + workspace_dir, f"{pack_lib_prefix}devc.{global_object_format}" + ) + m = _ffi_api.ModulePackImportsToLLVM( + self, is_system_lib, llvm_target_string, pack_lib_prefix + ) m.save(path_obj) files.append(path_obj) else: - path_cc = os.path.join(workspace_dir, "devc.c") + path_cc = os.path.join(workspace_dir, f"{pack_lib_prefix}devc.c") with open(path_cc, "w") as f: - f.write(_ffi_api.ModulePackImportsToC(self, is_system_lib)) + f.write(_ffi_api.ModulePackImportsToC(self, is_system_lib, pack_lib_prefix)) files.append(path_cc) # The imports could contain a c module but the object format could be tar @@ -589,7 +598,7 @@ def export_library(self, file_name, fcompile=None, addons=None, workspace_dir=No return fcompile(file_name, files, **kwargs) -def system_lib(): +def system_lib(symbol_prefix=""): """Get system-wide library module singleton. System lib is a global module that contains self register functions in startup. @@ -602,12 +611,18 @@ def system_lib(): The system lib is intended to be linked and loaded during the entire life-cyle of the program. If you want dynamic loading features, use dso modules instead. + Parameters + ---------- + symbol_prefix: Optional[str] + Optional symbol prefix that can be used for search. When we lookup a symbol + symbol_prefix + name will first be searched, then the name without symbol_prefix. + Returns ------- module : runtime.Module The system-wide library module. """ - return _ffi_api.SystemLib() + return _ffi_api.SystemLib(symbol_prefix) def load_module(path, fmt=""): diff --git a/src/runtime/library_module.h b/src/runtime/library_module.h index 44dc323186f8..167e819601fa 100644 --- a/src/runtime/library_module.h +++ b/src/runtime/library_module.h @@ -101,6 +101,8 @@ ObjectPtr CreateDSOLibraryObject(std::string library_path); * \param lib The library. * \param wrapper Optional function used to wrap a TVMBackendPackedCFunc, * by default WrapPackedFunc is used. + * \param symbol_prefix Optional symbol prefix that can be used to search alternative symbols. + * * \return The corresponding loaded module. * * \note This function can create multiple linked modules diff --git a/src/runtime/system_library.cc b/src/runtime/system_library.cc index fe29146d8b7b..be9257e53fc6 100644 --- a/src/runtime/system_library.cc +++ b/src/runtime/system_library.cc @@ -32,20 +32,8 @@ namespace tvm { namespace runtime { -class SystemLibrary : public Library { +class SystemLibraryRegistry { public: - SystemLibrary() = default; - - void* GetSymbol(const char* name) final { - std::lock_guard lock(mutex_); - auto it = tbl_.find(name); - if (it != tbl_.end()) { - return it->second; - } else { - return nullptr; - } - } - void RegisterSymbol(const std::string& name, void* ptr) { std::lock_guard lock(mutex_); auto it = tbl_.find(name); @@ -56,8 +44,18 @@ class SystemLibrary : public Library { tbl_[name] = ptr; } - static const ObjectPtr& Global() { - static auto inst = make_object(); + void* GetSymbol(const char* name) { + std::lock_guard lock(mutex_); + auto it = tbl_.find(name); + if (it != tbl_.end()) { + return it->second; + } else { + return nullptr; + } + } + + static SystemLibraryRegistry* Global() { + static SystemLibraryRegistry* inst = new SystemLibraryRegistry(); return inst; } @@ -68,14 +66,36 @@ class SystemLibrary : public Library { std::unordered_map tbl_; }; -TVM_REGISTER_GLOBAL("runtime.SystemLib").set_body_typed([]() { - static auto mod = CreateModuleFromLibrary(SystemLibrary::Global()); - return mod; +class SystemLibrary : public Library { + public: + explicit SystemLibrary(const std::string& symbol_prefix) : symbol_prefix_(symbol_prefix) {} + + void* GetSymbol(const char* name) { + if (symbol_prefix_.length() != 0) { + std::string name_with_prefix = symbol_prefix_ + name; + void* symbol = reg_->GetSymbol(name_with_prefix.c_str()); + if (symbol != nullptr) return symbol; + } + return reg_->GetSymbol(name); + } + + private: + SystemLibraryRegistry* reg_ = SystemLibraryRegistry::Global(); + std::string symbol_prefix_; +}; + +TVM_REGISTER_GLOBAL("runtime.SystemLib").set_body([](TVMArgs args, TVMRetValue* rv) { + std::string symbol_prefix = ""; + if (args.size() != 0) { + symbol_prefix = args[0].operator std::string(); + } + auto mod = CreateModuleFromLibrary(make_object(symbol_prefix)); + *rv = mod; }); } // namespace runtime } // namespace tvm int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr) { - tvm::runtime::SystemLibrary::Global()->RegisterSymbol(name, ptr); + tvm::runtime::SystemLibraryRegistry::Global()->RegisterSymbol(name, ptr); return 0; } diff --git a/src/target/codegen.cc b/src/target/codegen.cc index 24dbfebe5543..bbb2c15a647f 100644 --- a/src/target/codegen.cc +++ b/src/target/codegen.cc @@ -240,8 +240,15 @@ std::string SerializeModule(const runtime::Module& mod) { } } // namespace -std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { +std::string PackImportsToC(const runtime::Module& mod, bool system_lib, + const std::string& c_symbol_prefix) { std::string bin = SerializeModule(mod); + std::string mdev_blob_name = c_symbol_prefix + runtime::symbol::tvm_dev_mblob; + + if (c_symbol_prefix.length() != 0) { + CHECK(system_lib) + << "c_symbol_prefix advanced option should be used in conjuction with system-lib"; + } // translate to C program std::ostringstream os; @@ -253,10 +260,10 @@ std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { os << "#ifdef __cplusplus\n" << "extern \"C\" {\n" << "#endif\n"; - os << "TVM_EXPORT extern const unsigned char " << runtime::symbol::tvm_dev_mblob << "[];\n"; + os << "TVM_EXPORT extern const unsigned char " << mdev_blob_name << "[];\n"; uint64_t nbytes = bin.length(); - os << "const unsigned char " << runtime::symbol::tvm_dev_mblob << "[" - << bin.length() + sizeof(nbytes) << "] = {\n "; + os << "const unsigned char " << mdev_blob_name << "[" << bin.length() + sizeof(nbytes) + << "] = {\n "; os << std::hex; size_t nunit = 80 / 4; for (size_t i = 0; i < sizeof(nbytes); ++i) { @@ -279,9 +286,9 @@ std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { os << "\n};\n"; if (system_lib) { os << "extern int TVMBackendRegisterSystemLibSymbol(const char*, void*);\n"; - os << "static int " << runtime::symbol::tvm_dev_mblob << "_reg_ = " - << "TVMBackendRegisterSystemLibSymbol(\"" << runtime::symbol::tvm_dev_mblob << "\", (void*)" - << runtime::symbol::tvm_dev_mblob << ");\n"; + os << "static int " << mdev_blob_name << "_reg_ = " + << "TVMBackendRegisterSystemLibSymbol(\"" << mdev_blob_name << "\", (void*)" + << mdev_blob_name << ");\n"; } os << "#ifdef __cplusplus\n" << "}\n" @@ -290,7 +297,13 @@ std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { } runtime::Module PackImportsToLLVM(const runtime::Module& mod, bool system_lib, - const std::string& llvm_target_string) { + const std::string& llvm_target_string, + const std::string& c_symbol_prefix) { + if (c_symbol_prefix.length() != 0) { + CHECK(system_lib) + << "c_symbol_prefix advanced option should be used in conjuction with system-lib"; + } + std::string bin = SerializeModule(mod); uint64_t nbytes = bin.length(); @@ -308,7 +321,7 @@ runtime::Module PackImportsToLLVM(const runtime::Module& mod, bool system_lib, // the codegen function. const PackedFunc* codegen_f = runtime::Registry::Get(codegen_f_name); ICHECK(codegen_f != nullptr) << "codegen.codegen_blob is not presented."; - return (*codegen_f)(blob_byte_array, system_lib, llvm_target_string); + return (*codegen_f)(blob_byte_array, system_lib, llvm_target_string, c_symbol_prefix); } TVM_REGISTER_GLOBAL("target.Build").set_body_typed(Build); diff --git a/src/target/llvm/codegen_amdgpu.cc b/src/target/llvm/codegen_amdgpu.cc index a177aa7f6828..3efe548e1c2e 100644 --- a/src/target/llvm/codegen_amdgpu.cc +++ b/src/target/llvm/codegen_amdgpu.cc @@ -260,7 +260,7 @@ runtime::Module BuildAMDGPU(IRModule mod, Target target) { #endif auto cg = std::make_unique(); - cg->Init("TVMAMDGPUModule", llvm_target.get(), false, false, false); + cg->Init("TVMAMDGPUModule", llvm_target.get(), NullOpt, false, false); cg->AddFunctionsOrdered(mod->functions.begin(), mod->functions.end(), [](auto& kv) { ICHECK(kv.second->template IsInstance()) diff --git a/src/target/llvm/codegen_blob.cc b/src/target/llvm/codegen_blob.cc index 5bfc69126ddc..b4fe93b518d8 100644 --- a/src/target/llvm/codegen_blob.cc +++ b/src/target/llvm/codegen_blob.cc @@ -62,19 +62,22 @@ namespace tvm { namespace codegen { std::unique_ptr CodeGenBlob(const std::string& data, bool system_lib, - LLVMTarget* llvm_target) { + LLVMTarget* llvm_target, + const std::string& c_symbol_prefix) { llvm::TargetMachine* tm = llvm_target->GetOrCreateTargetMachine(); const llvm::Triple& triple = tm->getTargetTriple(); llvm::LLVMContext* ctx = llvm_target->GetContext(); - std::string module_name = "devc"; + std::string module_name = c_symbol_prefix + "devc"; auto module = std::make_unique(module_name, *ctx); module->setTargetTriple(triple.str()); llvm_target->SetTargetMetadata(module.get()); module->setDataLayout(tm->createDataLayout()); auto* blob_value = llvm::ConstantDataArray::getString(*ctx, data, false); + std::string mdev_blob_name = c_symbol_prefix + runtime::symbol::tvm_dev_mblob; + auto* tvm_dev_mblob = new llvm::GlobalVariable( *module, blob_value->getType(), true, llvm::GlobalValue::ExternalLinkage, blob_value, - runtime::symbol::tvm_dev_mblob, nullptr, llvm::GlobalVariable::NotThreadLocal, 0); + mdev_blob_name, nullptr, llvm::GlobalVariable::NotThreadLocal, 0); // If large const data (>2GB) is saved to default .rodata section // then linking it to shared library will fail - relocation truncated to fit: R_X86_64_PC32. @@ -106,9 +109,9 @@ std::unique_ptr CodeGenBlob(const std::string& data, bool system_l auto int8_ptr_ty = int8_ty->getPointerTo(0); llvm::Constant* constant_zero = llvm::Constant::getNullValue(int32_ty); - auto* tvm_dev_mblob_reg = new llvm::GlobalVariable( - *module, int32_ty, false, llvm::GlobalValue::InternalLinkage, constant_zero, - std::string(runtime::symbol::tvm_dev_mblob) + "_reg_"); + auto* tvm_dev_mblob_reg = + new llvm::GlobalVariable(*module, int32_ty, false, llvm::GlobalValue::InternalLinkage, + constant_zero, mdev_blob_name + "_reg_"); auto tvm_dev_mblob_reg_alignment = #if TVM_LLVM_VERSION >= 110 module->getDataLayout().getABITypeAlign(int32_ty); @@ -121,13 +124,12 @@ std::unique_ptr CodeGenBlob(const std::string& data, bool system_l tvm_dev_mblob_reg->setAlignment(tvm_dev_mblob_reg_alignment); #endif - auto* tvm_dev_mblob_string_ty = - llvm::ArrayType::get(int8_ty, std::strlen(runtime::symbol::tvm_dev_mblob) + 1); + auto* tvm_dev_mblob_string_ty = llvm::ArrayType::get(int8_ty, mdev_blob_name.length() + 1); auto* tvm_dev_mblob_string_value = - llvm::ConstantDataArray::getString(*ctx, runtime::symbol::tvm_dev_mblob, true); + llvm::ConstantDataArray::getString(*ctx, mdev_blob_name, true); auto* tvm_dev_mblob_string = new llvm::GlobalVariable( *module, tvm_dev_mblob_string_ty, true, llvm::GlobalValue::PrivateLinkage, - tvm_dev_mblob_string_value, std::string(runtime::symbol::tvm_dev_mblob) + ".str"); + tvm_dev_mblob_string_value, mdev_blob_name + ".str"); #if TVM_LLVM_VERSION >= 100 tvm_dev_mblob_string->setAlignment(llvm::Align(1)); #else diff --git a/src/target/llvm/codegen_blob.h b/src/target/llvm/codegen_blob.h index a06c043c07b1..e3d61ff68409 100644 --- a/src/target/llvm/codegen_blob.h +++ b/src/target/llvm/codegen_blob.h @@ -44,11 +44,13 @@ class LLVMTarget; * \param data Blob data * \param system_lib Whether expose as system library. * \param target_triple LLVM target triple + * \param c_symbol prefix The C symbol prefix of the blob. * * \return LLVM module and LLVM context */ std::unique_ptr CodeGenBlob(const std::string& data, bool system_lib, - LLVMTarget* llvm_target); + LLVMTarget* llvm_target, + const std::string& c_symbol_prefix = ""); } // namespace codegen } // namespace tvm diff --git a/src/target/llvm/codegen_cpu.cc b/src/target/llvm/codegen_cpu.cc index 59575c370fe0..dbcdb4a3af87 100644 --- a/src/target/llvm/codegen_cpu.cc +++ b/src/target/llvm/codegen_cpu.cc @@ -71,9 +71,11 @@ namespace codegen { CodeGenCPU::CodeGenCPU() = default; CodeGenCPU::~CodeGenCPU() = default; -void CodeGenCPU::Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime) { - CodeGenLLVM::Init(module_name, llvm_target, system_lib, dynamic_lookup, target_c_runtime); +void CodeGenCPU::Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, + bool target_c_runtime) { + CodeGenLLVM::Init(module_name, llvm_target, system_lib_prefix, dynamic_lookup, target_c_runtime); + system_lib_prefix_ = system_lib_prefix; dbg_info_ = CreateDebugInfo(module_.get()); static_assert(sizeof(TVMValue) == sizeof(double), "invariant"); func_handle_map_.clear(); @@ -153,7 +155,7 @@ void CodeGenCPU::Init(const std::string& module_name, LLVMTarget* llvm_target, b ftype_tvm_static_init_callback_->getPointerTo(), t_void_p_, t_int_}, false); // initialize TVM runtime API - if (system_lib && !target_c_runtime) { + if (system_lib_prefix_.defined() && !target_c_runtime) { // We will need this in environment for backward registration. // Defined in include/tvm/runtime/c_backend_api.h: // int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr); @@ -163,7 +165,7 @@ void CodeGenCPU::Init(const std::string& module_name, LLVMTarget* llvm_target, b } else { f_tvm_register_system_symbol_ = nullptr; } - if (dynamic_lookup || system_lib) { + if (dynamic_lookup || system_lib_prefix_.defined()) { f_tvm_func_call_ = llvm::Function::Create(ftype_tvm_func_call_, llvm::Function::ExternalLinkage, "TVMFuncCall", module_.get()); f_tvm_get_func_from_env_ = @@ -180,7 +182,6 @@ void CodeGenCPU::Init(const std::string& module_name, LLVMTarget* llvm_target, b "TVMBackendParallelBarrier", module_.get()); } target_c_runtime_ = target_c_runtime; - is_system_lib_ = system_lib; InitGlobalContext(dynamic_lookup); } @@ -527,12 +528,12 @@ llvm::Value* CodeGenCPU::GetContextPtr(llvm::GlobalVariable* gv) { } void CodeGenCPU::InitGlobalContext(bool dynamic_lookup) { + std::string ctx_symbol = system_lib_prefix_.value_or("") + tvm::runtime::symbol::tvm_module_ctx; // Module context - gv_mod_ctx_ = InitContextPtr(t_void_p_, tvm::runtime::symbol::tvm_module_ctx); + gv_mod_ctx_ = InitContextPtr(t_void_p_, ctx_symbol); // Register back the locations. if (f_tvm_register_system_symbol_ != nullptr && !target_c_runtime_) { - export_system_symbols_.emplace_back( - std::make_pair(tvm::runtime::symbol::tvm_module_ctx, gv_mod_ctx_)); + export_system_symbols_.emplace_back(std::make_pair(ctx_symbol, gv_mod_ctx_)); } else { if (!dynamic_lookup) { gv_tvm_func_call_ = InitContextPtr(ftype_tvm_func_call_->getPointerTo(), "__TVMFuncCall"); @@ -1344,7 +1345,8 @@ void CodeGenCPU::DefineMetadata(runtime::metadata::Metadata metadata) { } void CodeGenCPU::DefineFunctionRegistry(Array func_names) { - ICHECK(is_system_lib_) << "Loading of --system-lib modules is yet to be defined for C runtime"; + ICHECK(system_lib_prefix_.defined()) + << "Loading of --system-lib modules is yet to be defined for C runtime"; Array symbols; std::vector funcs; for (auto sym : func_names) { diff --git a/src/target/llvm/codegen_cpu.h b/src/target/llvm/codegen_cpu.h index afbd49e14348..3cc1bbeb419e 100644 --- a/src/target/llvm/codegen_cpu.h +++ b/src/target/llvm/codegen_cpu.h @@ -64,8 +64,9 @@ class CodeGenCPU : public CodeGenLLVM { CodeGenCPU(); virtual ~CodeGenCPU(); - void Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime) override; + void Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, + bool target_c_runtime) override; void AddFunction(const PrimFunc& f) override; void AddMainFunction(const std::string& entry_func_name) override; std::unique_ptr Finish() override; @@ -191,7 +192,9 @@ class CodeGenCPU : public CodeGenLLVM { // internal debug information, to be populated by std::unique_ptr dbg_info_; bool target_c_runtime_; - bool is_system_lib_; + // The system lib prefix if it is not nullopt, then we should do + // system lib registration with the given prefix. The prefix can be "" + Optional system_lib_prefix_; // Get the DWARF type corresponding to the LLVM type |ty|. The current API in practice only // generates |int32|, and |int8*|. diff --git a/src/target/llvm/codegen_hexagon.cc b/src/target/llvm/codegen_hexagon.cc index 2adb95f659b9..a2f13e98b1e5 100644 --- a/src/target/llvm/codegen_hexagon.cc +++ b/src/target/llvm/codegen_hexagon.cc @@ -69,8 +69,9 @@ namespace codegen { // Hexagon code generation class CodeGenHexagon final : public CodeGenCPU { public: - void Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime) override; + void Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, + bool target_c_runtime) override; void InitTarget() final; using CodeGenCPU::VisitStmt_; @@ -114,9 +115,10 @@ class CodeGenHexagon final : public CodeGenCPU { "tvm_vect_qhmath_hvx_ceil_ahf", "tvm_vect_qhmath_hvx_pow_ahf"}; }; -void CodeGenHexagon::Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime) { - CodeGenCPU::Init(module_name, llvm_target, system_lib, dynamic_lookup, target_c_runtime); +void CodeGenHexagon::Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, + bool target_c_runtime) { + CodeGenCPU::Init(module_name, llvm_target, system_lib_prefix, dynamic_lookup, target_c_runtime); } void CodeGenHexagon::InitTarget() { @@ -563,7 +565,7 @@ runtime::Module BuildHexagon(IRModule mod, Target target) { funcs.emplace_back(f); } - cg->Init("TVMHexagonModule", llvm_target.get(), false, false, false); + cg->Init("TVMHexagonModule", llvm_target.get(), NullOpt, false, false); cg->AddFunctionsOrdered(funcs.begin(), funcs.end()); if (entry_func.length() != 0) { cg->AddMainFunction(entry_func); diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 2a8c3226f304..01e25d536118 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -136,8 +136,9 @@ std::unique_ptr CodeGenLLVM::Create(LLVMTarget* llvm_target) { } } -void CodeGenLLVM::Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime) { +void CodeGenLLVM::Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, + bool target_c_runtime) { llvm_target_ = llvm_target; llvm::LLVMContext* ctx = llvm_target_->GetContext(); builder_.reset(new IRBuilder(*ctx)); diff --git a/src/target/llvm/codegen_llvm.h b/src/target/llvm/codegen_llvm.h index 0d5650c473cf..ca4c916f84d8 100644 --- a/src/target/llvm/codegen_llvm.h +++ b/src/target/llvm/codegen_llvm.h @@ -116,14 +116,15 @@ class CodeGenLLVM : public ExprFunctor, * \param module_name The name of the module. * \param tm Target machine model * \param ctx The context. - * \param system_lib Whether to insert system library registration. + * \param system_lib_prefix If the value is not NullOpt, insert system lib registration. + * The value corresponds to the prefix of the system lib symbols. * \param dynamic_lookup Whether dynamically lookup runtime function * or use the runtime function table passed by caller. * \param target_c_runtime If true, generate a module to be executed by the C runtime. In practice * this option influences whether global ctors are used. */ - virtual void Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, - bool dynamic_lookup, bool target_c_runtime); + virtual void Init(const std::string& module_name, LLVMTarget* llvm_target, + Optional system_lib_prefix, bool dynamic_lookup, bool target_c_runtime); /*! * \brief Turn on fast math flags for floating point operations. diff --git a/src/target/llvm/codegen_nvptx.cc b/src/target/llvm/codegen_nvptx.cc index 46816eb20cc5..18f60922910b 100644 --- a/src/target/llvm/codegen_nvptx.cc +++ b/src/target/llvm/codegen_nvptx.cc @@ -309,7 +309,7 @@ runtime::Module BuildNVPTX(IRModule mod, Target target) { int compute_ver = GetCUDAComputeVersion(target); auto cg = std::make_unique(); - cg->Init("TVMPTXModule", llvm_target.get(), false, false, false); + cg->Init("TVMPTXModule", llvm_target.get(), NullOpt, false, false); cg->AddFunctionsOrdered(mod->functions.begin(), mod->functions.end(), [](auto& kv) { ICHECK(kv.second->template IsInstance()) diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index 2173cad4a719..b6a0da84752a 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -142,6 +142,15 @@ PackedFunc LLVMModuleNode::GetFunction(const std::string& name, if (name == "__tvm_is_system_module") { bool flag = (module_->getFunction("__tvm_module_startup") != nullptr); return PackedFunc([flag](TVMArgs args, TVMRetValue* rv) { *rv = flag; }); + } else if (name == "__tvm_get_system_lib_prefix") { + return PackedFunc([this](TVMArgs args, TVMRetValue* rv) { + auto* md = module_->getModuleFlag("tvm_system_lib_prefix"); + if (md != nullptr) { + *rv = llvm::cast(md)->getString().str(); + } else { + *rv = nullptr; + } + }); } else if (name == "get_func_names") { return PackedFunc( [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->function_names_; }); @@ -290,7 +299,12 @@ void LLVMModuleNode::Init(const IRModule& mod, const Target& target) { std::string entry_func; relay::Runtime runtime = mod->GetAttr(tvm::attr::kRuntime).value_or(relay::Runtime::Create("cpp")); - bool system_lib = runtime->GetAttr("system-lib").value_or(Bool(false)); + + Optional system_lib_prefix = mod->GetAttr(tvm::attr::kSystemLibPrefix); + if (!system_lib_prefix && runtime->GetAttr("system-lib").value_or(Bool(false))) { + system_lib_prefix = ""; + } + bool target_c_runtime = runtime->name == "crt"; for (auto kv : mod->functions) { @@ -312,7 +326,8 @@ void LLVMModuleNode::Init(const IRModule& mod, const Target& target) { // ICHECK(funcs.size() > 0); // TODO(tqchen): remove the entry function behavior as it does not // makes sense when we start to use multiple modules. - cg->Init("TVMMod", llvm_target.get(), system_lib, system_lib, target_c_runtime); + cg->Init("TVMMod", llvm_target.get(), system_lib_prefix, system_lib_prefix.defined(), + target_c_runtime); cg->SetFastMathFlags(llvm_target->GetFastMathFlags()); cg->AddFunctionsOrdered(funcs.begin(), funcs.end()); @@ -326,6 +341,12 @@ void LLVMModuleNode::Init(const IRModule& mod, const Target& target) { module_->addModuleFlag(llvm::Module::Override, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); + if (system_lib_prefix) { + std::string str_val = system_lib_prefix.value(); + module_->addModuleFlag(llvm::Module::Warning, "tvm_system_lib_prefix", + llvm::MDString::get(*(llvm_target->GetContext()), str_val)); + } + if (tm->getTargetTriple().isOSDarwin()) { module_->addModuleFlag(llvm::Module::Override, "Dwarf Version", 2); } @@ -479,12 +500,13 @@ TVM_REGISTER_GLOBAL("codegen.llvm_target_enabled") }); TVM_REGISTER_GLOBAL("codegen.codegen_blob") - .set_body_typed([](std::string data, bool system_lib, - std::string llvm_target_string) -> runtime::Module { + .set_body_typed([](std::string data, bool system_lib, std::string llvm_target_string, + std::string c_symbol_prefix) -> runtime::Module { auto n = make_object(); auto llvm_instance = std::make_unique(); With llvm_target(*llvm_instance, llvm_target_string); - std::unique_ptr blob = CodeGenBlob(data, system_lib, llvm_target.get()); + std::unique_ptr blob = + CodeGenBlob(data, system_lib, llvm_target.get(), c_symbol_prefix); n->Init(std::move(blob), std::move(llvm_instance)); return runtime::Module(n); }); @@ -493,10 +515,15 @@ runtime::Module CreateLLVMCppMetadataModule(runtime::metadata::Metadata metadata tvm::relay::Runtime runtime) { auto llvm_instance = std::make_unique(); With llvm_target(*llvm_instance, target); - bool system_lib = runtime->GetAttr("system-lib").value_or(Bool(false)); + + Optional system_lib_prefix = NullOpt; + if (runtime->GetAttr("system-lib").value_or(Bool(false))) { + system_lib_prefix = ""; + } + auto cg = std::make_unique(); - cg->Init("TVMMetadataMod", llvm_target.get(), system_lib, system_lib, + cg->Init("TVMMetadataMod", llvm_target.get(), system_lib_prefix, system_lib_prefix.defined(), /*target_c_runtime=*/false); cg->DefineMetadata(metadata); @@ -531,13 +558,19 @@ runtime::Module CreateLLVMCrtMetadataModule(const Array& module auto llvm_instance = std::make_unique(); With llvm_target(*llvm_instance, target); - bool system_lib = runtime->GetAttr("system-lib").value_or(Bool(false)); + + Optional system_lib_prefix = NullOpt; + if (runtime->GetAttr("system-lib").value_or(Bool(false))) { + system_lib_prefix = ""; + } + bool target_c_runtime = runtime->name == "crt"; - ICHECK(system_lib && target_c_runtime) + ICHECK(system_lib_prefix.defined() && target_c_runtime) << "For LLVM C-runtime metadata module, must include --system-lib and --runtime=c; " << "got target: " << target->str(); auto cg = std::make_unique(); - cg->Init("TVMMetadataMod", llvm_target.operator->(), system_lib, system_lib, target_c_runtime); + cg->Init("TVMMetadataMod", llvm_target.operator->(), system_lib_prefix, + system_lib_prefix.defined(), target_c_runtime); cg->DefineFunctionRegistry(func_names); auto mod = cg->Finish(); diff --git a/tests/python/unittest/test_runtime_module_load.py b/tests/python/unittest/test_runtime_module_load.py index 9d067630879a..31e0faf0d4e3 100644 --- a/tests/python/unittest/test_runtime_module_load.py +++ b/tests/python/unittest/test_runtime_module_load.py @@ -16,8 +16,7 @@ # under the License. import tvm from tvm import te -from tvm.contrib import cc, utils -import ctypes +from tvm.contrib import cc, utils, popen_pool import sys import numpy as np import subprocess @@ -128,15 +127,24 @@ def check_device(device): # test cross compiler function f.export_library(path_dso, cc.cross_compiler("g++")) - f1 = tvm.runtime.load_module(path_dso) - a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev) - b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev) - f1(a, b) - np.testing.assert_equal(b.numpy(), a.numpy() + 1) - if sys.platform != "win32": - f2 = tvm.runtime.system_lib() - f2[name](a, b) + def popen_check(): + import tvm + import sys + + f1 = tvm.runtime.load_module(path_dso) + a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev) + b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev) + f1(a, b) np.testing.assert_equal(b.numpy(), a.numpy() + 1) + if sys.platform != "win32": + f2 = tvm.runtime.system_lib() + f2[name](a, b) + np.testing.assert_equal(b.numpy(), a.numpy() + 1) + + # system lib should be loaded in different process + worker = popen_pool.PopenWorker() + worker.send(popen_check) + worker.recv() def check_stackvm(device): dev = tvm.device(device, 0) @@ -208,16 +216,26 @@ def check_system_lib(): fadd1.save(path1) fadd2.save(path2) cc.create_shared(path_dso, [path1, path2]) - # Load dll, will trigger system library registration - ctypes.CDLL(path_dso) - # Load the system wide library - mm = tvm.runtime.system_lib() - a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), dev) - b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), dev) - mm["myadd1"](a, b) - np.testing.assert_equal(b.numpy(), a.numpy() + 1) - mm["myadd2"](a, b) - np.testing.assert_equal(b.numpy(), a.numpy() + 1) + + def popen_check(): + import tvm.runtime + import ctypes + + # Load dll, will trigger system library registration + ctypes.CDLL(path_dso) + # Load the system wide library + mm = tvm.runtime.system_lib() + a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), dev) + b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), dev) + mm["myadd1"](a, b) + np.testing.assert_equal(b.numpy(), a.numpy() + 1) + mm["myadd2"](a, b) + np.testing.assert_equal(b.numpy(), a.numpy() + 1) + + # system lib should be loaded in different process + worker = popen_pool.PopenWorker() + worker.send(popen_check) + worker.recv() if sys.platform != "win32": check_system_lib() diff --git a/tests/python/unittest/test_runtime_rpc.py b/tests/python/unittest/test_runtime_rpc.py index 8d0567cecce9..97016684a6b8 100644 --- a/tests/python/unittest/test_runtime_rpc.py +++ b/tests/python/unittest/test_runtime_rpc.py @@ -179,7 +179,7 @@ def check_remote(): @tvm.testing.skip_if_32bit(reason="skipping test for i386.") @tvm.testing.requires_rpc def test_rpc_echo(): - def check(remote): + def check(remote, local_session): fecho = remote.get_function("testing.echo") assert fecho(1, 2, 3) == 1 assert fecho(100, 2, 3) == 100 @@ -191,15 +191,19 @@ def check(remote): raise_err() remote.cpu().sync() - with pytest.raises(AttributeError): - f3 = remote.system_lib()["notexist"] + # tests around system lib are not threadsafe by design + # and do not work well with multithread pytest + # skip local session as they are being tested elsewhere + if not local_session: + with pytest.raises(AttributeError): + f3 = remote.system_lib()["notexist"] temp = rpc.server._server_env([]) server = rpc.Server() client = rpc.connect("127.0.0.1", server.port) - check(rpc.LocalSession()) + check(rpc.LocalSession(), True) - check(client) + check(client, False) def check_minrpc(): if tvm.get_global_func("rpc.CreatePipeClient", allow_missing=True) is None: @@ -208,7 +212,7 @@ def check_minrpc(): temp = utils.tempdir() minrpc_exec = temp.relpath("minrpc") tvm.rpc.with_minrpc(cc.create_executable)(minrpc_exec, []) - check(rpc.PopenSession(minrpc_exec)) + check(rpc.PopenSession(minrpc_exec), False) # minrpc on the remote server = rpc.Server() client = rpc.connect( @@ -216,7 +220,7 @@ def check_minrpc(): server.port, session_constructor_args=["rpc.PopenSession", open(minrpc_exec, "rb").read()], ) - check(client) + check(client, False) check_minrpc() diff --git a/tests/python/unittest/test_target_codegen_blob.py b/tests/python/unittest/test_target_codegen_blob.py index 62dcf924b43b..570de4b248b6 100644 --- a/tests/python/unittest/test_target_codegen_blob.py +++ b/tests/python/unittest/test_target_codegen_blob.py @@ -15,14 +15,14 @@ # specific language governing permissions and limitations # under the License. +import ctypes import numpy as np from tvm import relay -from tvm.relay import testing -from tvm.contrib import graph_executor +import tvm.relay.testing +from tvm.contrib import graph_executor, cc, utils, popen_pool import tvm -from tvm import te -import ctypes import tvm.testing +from tvm.script import ir as I, tir as T @tvm.testing.uses_gpu @@ -49,8 +49,6 @@ def verify(data): with tvm.transform.PassContext(opt_level=3): synthetic_gpu_lib = relay.build_module.build(synthetic_mod, "cuda", params=synthetic_params) - from tvm.contrib import utils - temp = utils.tempdir() path_lib = temp.relpath("deploy_lib.so") synthetic_gpu_lib.export_library(path_lib) @@ -67,34 +65,71 @@ def verify(data): @tvm.testing.uses_gpu -def test_cuda_lib(): +def test_cuda_multi_lib(): + # test combining two system lib together + # each contains a fatbin component in cuda dev = tvm.cuda(0) for device in ["llvm", "cuda"]: if not tvm.testing.device_enabled(device): print("skip because %s is not enabled..." % device) return - nn = 12 - n = tvm.runtime.convert(nn) - A = te.placeholder((n,), name="A") - B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B") - s = te.create_schedule(B.op) - bx, tx = s[B].split(B.op.axis[0], factor=4) - s[B].bind(bx, te.thread_axis("blockIdx.x")) - s[B].bind(tx, te.thread_axis("threadIdx.x")) - from tvm.contrib import utils + @tvm.script.ir_module + class ModA: + I.module_attrs({"system_lib_prefix": "modA_"}) + + @T.prim_func + def my_inplace_update(x: T.Buffer((12), "float32")) -> None: + T.func_attr({"global_symbol": "modA_my_inplace_update"}) + for bx in T.thread_binding(T.int64(1), thread="blockIdx.x"): + for tx in T.thread_binding(T.int64(12), thread="threadIdx.x"): + x[tx] = x[tx] + 1 + + @tvm.script.ir_module + class ModB: + I.module_attrs({"system_lib_prefix": "modB_"}) + + @T.prim_func + def my_inplace_update(x: T.Buffer((12), "float32")) -> None: + T.func_attr({"global_symbol": "modB_my_inplace_update"}) + for bx in T.thread_binding(T.int64(1), thread="blockIdx.x"): + for tx in T.thread_binding(T.int64(12), thread="threadIdx.x"): + x[tx] = x[tx] + 2 temp = utils.tempdir() - fn_add = tvm.build(s, [A, B], target="cuda --host=llvm", name="add") - path_lib = temp.relpath("deploy_lib.so") - fn_add.export_library(path_lib) - m = tvm.runtime.load_module(path_lib) - a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), dev) - b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), dev) - m["add"](a, b) - np.testing.assert_equal(b.numpy(), a.numpy() + 1) + target = tvm.target.Target("cuda", host="llvm") + libA = tvm.build(ModA, target=target) + libB = tvm.build(ModB, target=target) + + pathA = temp.relpath("libA.a") + pathB = temp.relpath("libB.a") + path_dso = temp.relpath("mylib.so") + libA.export_library(pathA, cc.create_staticlib) + libB.export_library(pathB, cc.create_staticlib) + # package two static libs together + cc.create_shared(path_dso, ["-Wl,--whole-archive", pathA, pathB, "-Wl,--no-whole-archive"]) + + def popen_check(): + # Load dll, will trigger system library registration + ctypes.CDLL(path_dso) + # Load the system wide library + dev = tvm.cuda() + a_np = np.random.uniform(size=12).astype("float32") + a_nd = tvm.nd.array(a_np, dev) + b_nd = tvm.nd.array(a_np, dev) + syslibA = tvm.runtime.system_lib("modA_") + syslibB = tvm.runtime.system_lib("modB_") + syslibA["my_inplace_update"](a_nd) + syslibB["my_inplace_update"](b_nd) + np.testing.assert_equal(a_nd.numpy(), a_np + 1) + np.testing.assert_equal(b_nd.numpy(), a_np + 2) + + # system lib should be loaded in different process + worker = popen_pool.PopenWorker() + worker.send(popen_check) + worker.recv() if __name__ == "__main__": test_synthetic() - test_cuda_lib() + test_cuda_multilib()