From c12fb2b8ec40610db470803051736e2137ffd3cd Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Wed, 8 Jun 2022 14:00:18 +0800 Subject: [PATCH 1/7] [aot] [llvm] Implemented FieldCacheData and refactored initialize_llvm_runtime_snodes() --- taichi/llvm/llvm_offline_cache.h | 31 +++++- taichi/llvm/llvm_program.cpp | 177 ++++++++++++++++++------------- taichi/llvm/llvm_program.h | 6 +- 3 files changed, 139 insertions(+), 75 deletions(-) diff --git a/taichi/llvm/llvm_offline_cache.h b/taichi/llvm/llvm_offline_cache.h index c82837e42521e..fe9eca8f8ba96 100644 --- a/taichi/llvm/llvm_offline_cache.h +++ b/taichi/llvm/llvm_offline_cache.h @@ -42,7 +42,36 @@ struct LlvmOfflineCache { TI_IO_DEF(kernel_key, args, offloaded_task_list); }; - std::unordered_map kernels; + struct FieldCacheData { + struct SNodeCacheData { + int id; + int type; + size_t cell_size_bytes; + size_t chunk_size; + + TI_IO_DEF(id, type, cell_size_bytes, chunk_size); + }; + + int tree_id; + size_t root_size; + std::vector snode_metas; + + TI_IO_DEF(tree_id, root_size, snode_metas); + + // TODO(zhanlue) + // Serialize/Deserialize the llvm::Module from StructCompiler + // At runtime, make sure loaded Field-Modules and Kernel-Modules are linked + // altogether. + }; + + // TODO(zhanlue): we need a better identifier for each FieldCacheData + // (SNodeTree) Given that snode_tree_id is not continuous, it is ridiculous to + // ask the users to remember each of the snode_tree_ids + // ** Find a way to name each SNodeTree ** + std::unordered_map fields; // key = snode_tree_id + + std::unordered_map + kernels; // key = kernel_name TI_IO_DEF(kernels); }; diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index 65bc1a75e12e1..e922a9ebe8026 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -158,95 +158,105 @@ LlvmProgramImpl::clone_struct_compiler_initial_context( return tlctx->clone_runtime_module(); } -void LlvmProgramImpl::initialize_llvm_runtime_snodes(const SNodeTree *tree, - StructCompiler *scomp, - uint64 *result_buffer) { - TaichiLLVMContext *tlctx = nullptr; - if (config->arch == Arch::cuda) { +void LlvmProgramImpl::initialize_llvm_runtime_snodes( + const LlvmOfflineCache::FieldCacheData &field_cache_data, + uint64 *result_buffer); +TaichiLLVMContext *tlctx = nullptr; +if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - tlctx = llvm_context_device_.get(); + tlctx = llvm_context_device_.get(); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif - } else { - tlctx = llvm_context_host_.get(); - } +} else { + tlctx = llvm_context_host_.get(); +} - auto *const runtime_jit = tlctx->runtime_jit_module; - // By the time this creator is called, "this" is already destroyed. - // Therefore it is necessary to capture members by values. - const auto snodes = scomp->snodes; - const int root_id = tree->root()->id; - - TI_TRACE("Allocating data structure of size {} bytes", scomp->root_size); - std::size_t rounded_size = - taichi::iroundup(scomp->root_size, taichi_page_size); - - Ptr root_buffer = snode_tree_buffer_manager_->allocate( - runtime_jit, llvm_runtime_, rounded_size, taichi_page_size, tree->id(), - result_buffer); - if (config->arch == Arch::cuda) { +auto *const runtime_jit = tlctx -> runtime_jit_module; +// By the time this creator is called, "this" is already destroyed. +// Therefore it is necessary to capture members by values. +size_t root_size = field_cache_data.root_size; +const auto snode_metas = field_cache_data.snode_metas; +const int root_id = field_cache_data.tree_id; + +TI_TRACE("Allocating data structure of size {} bytes", root_size); +std::size_t rounded_size = taichi::iroundup(root_size, taichi_page_size); + +Ptr root_buffer = snode_tree_buffer_manager_->allocate(runtime_jit, + llvm_runtime_, + rounded_size, + taichi_page_size, + root_id, + result_buffer); +if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - CUDADriver::get_instance().memset(root_buffer, 0, rounded_size); + CUDADriver::get_instance().memset(root_buffer, 0, rounded_size); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif - } else { - std::memset(root_buffer, 0, rounded_size); - } +} else { + std::memset(root_buffer, 0, rounded_size); +} - DeviceAllocation alloc{kDeviceNullAllocation}; +DeviceAllocation alloc{kDeviceNullAllocation}; - if (config->arch == Arch::cuda) { +if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - alloc = cuda_device()->import_memory(root_buffer, rounded_size); + alloc = cuda_device()->import_memory(root_buffer, rounded_size); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif - } else { - alloc = cpu_device()->import_memory(root_buffer, rounded_size); - } +} else { + alloc = cpu_device()->import_memory(root_buffer, rounded_size); +} - snode_tree_allocs_[tree->id()] = alloc; +snode_tree_allocs_[tree->id()] = alloc; - bool all_dense = config->demote_dense_struct_fors; - for (int i = 0; i < (int)snodes.size(); i++) { - if (snodes[i]->type != SNodeType::dense && - snodes[i]->type != SNodeType::place && - snodes[i]->type != SNodeType::root) { - all_dense = false; - break; - } +bool all_dense = config->demote_dense_struct_fors; +for (size_t i = 0; i < snode_metas.size(); i++) { + if (snode_metas[i]->type != SNodeType::dense && + snode_metas[i]->type != SNodeType::place && + snode_metas[i]->type != SNodeType::root) { + all_dense = false; + break; } +} - runtime_jit->call( - "runtime_initialize_snodes", llvm_runtime_, scomp->root_size, root_id, - (int)snodes.size(), tree->id(), rounded_size, root_buffer, all_dense); - - for (int i = 0; i < (int)snodes.size(); i++) { - if (is_gc_able(snodes[i]->type)) { - const auto snode_id = snodes[i]->id; - std::size_t node_size; - auto element_size = snodes[i]->cell_size_bytes; - if (snodes[i]->type == SNodeType::pointer) { - // pointer. Allocators are for single elements - node_size = element_size; - } else { - // dynamic. Allocators are for the chunks - node_size = sizeof(void *) + element_size * snodes[i]->chunk_size; - } - TI_TRACE("Initializing allocator for snode {} (node size {})", snode_id, - node_size); - auto rt = llvm_runtime_; - runtime_jit->call( - "runtime_NodeAllocator_initialize", rt, snode_id, node_size); - TI_TRACE("Allocating ambient element for snode {} (node size {})", - snode_id, node_size); - runtime_jit->call("runtime_allocate_ambient", rt, snode_id, - node_size); +runtime_jit->call( + "runtime_initialize_snodes", + llvm_runtime_, + root_size, + root_id, + (int)snode_metas.size(), + root_id, + rounded_size, + root_buffer, + all_dense); + +for (size_t i = 0; i < snode_metas.size(); i++) { + if (is_gc_able(snode_metas[i]->type)) { + const auto snode_id = snode_metas[i].id; + std::size_t node_size; + auto element_size = snode_metas[i].cell_size_bytes; + if (snode_metas[i].type == SNodeType::pointer) { + // pointer. Allocators are for single elements + node_size = element_size; + } else { + // dynamic. Allocators are for the chunks + node_size = sizeof(void *) + element_size * snode_metas[i].chunk_size; } + TI_TRACE("Initializing allocator for snode {} (node size {})", snode_id, + node_size); + auto rt = llvm_runtime_; + runtime_jit->call( + "runtime_NodeAllocator_initialize", rt, snode_id, node_size); + TI_TRACE("Allocating ambient element for snode {} (node size {})", snode_id, + node_size); + runtime_jit->call("runtime_allocate_ambient", rt, snode_id, + node_size); } } +} std::unique_ptr LlvmProgramImpl::compile_snode_tree_types_impl( SNodeTree *tree) { @@ -275,10 +285,35 @@ void LlvmProgramImpl::compile_snode_tree_types(SNodeTree *tree) { compile_snode_tree_types_impl(tree); } +static LlvmOfflineCache::FieldCacheData construct_filed_cache_data( + const SNodeTree &tree, + const StructCompiler &struct_compiler) { + TI_ASSERT(tree.id == tree.root()->id); + + LlvmOfflineCache::FieldCacheData ret; + ret.tree_id = tree.id; + ret.root_size = struct_compiler.root_size; + + const auto &snodes = struct_compiler.snodes; + for (size_t i = 0; i < snodes.size(); i++) { + LlvmOfflineCache::FieldCacheData::SNodeCacheData snode_cache_data; + snode_cache_data.id = snodes[i]->id; + snode_cache_data.type = snodes[i]->type; + snode_cache_data.cell_size_bytes = snodes[i]->cell_size_bytes; + snode_cache_data.chunk_size = snodes[i]->chunk_size; + + ret.snode_metas.emplace_back(std::move(snode_cache_data)); + } + + return ret; +} + void LlvmProgramImpl::materialize_snode_tree(SNodeTree *tree, uint64 *result_buffer) { auto struct_compiler = compile_snode_tree_types_impl(tree); - initialize_llvm_runtime_snodes(tree, struct_compiler.get(), result_buffer); + + auto field_cache_data = construct_filed_cache_data(*tree, *struct_compiler); + initialize_llvm_runtime_snodes(field_cache_data, result_buffer); } uint64 LlvmProgramImpl::fetch_result_uint64(int i, uint64 *result_buffer) { diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h index c9029bbcd85f0..69378ee660bf1 100644 --- a/taichi/llvm/llvm_program.h +++ b/taichi/llvm/llvm_program.h @@ -132,9 +132,9 @@ class LlvmProgramImpl : public ProgramImpl { /** * Initializes the SNodes for LLVM based backends. */ - void initialize_llvm_runtime_snodes(const SNodeTree *tree, - StructCompiler *scomp, - uint64 *result_buffer); + void initialize_llvm_runtime_snodes( + const LlvmOfflineCache::FieldCacheData &field_cache_data, + uint64 *result_buffer); uint64 fetch_result_uint64(int i, uint64 *result_buffer); From e3f1ab8ac70798f8b14e5e4750c9ea43c3544b74 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Wed, 8 Jun 2022 14:15:47 +0800 Subject: [PATCH 2/7] Addressed compilation erros --- taichi/llvm/llvm_offline_cache.h | 3 +- taichi/llvm/llvm_program.cpp | 152 +++++++++++++++---------------- 2 files changed, 73 insertions(+), 82 deletions(-) diff --git a/taichi/llvm/llvm_offline_cache.h b/taichi/llvm/llvm_offline_cache.h index fe9eca8f8ba96..91386c2bffb31 100644 --- a/taichi/llvm/llvm_offline_cache.h +++ b/taichi/llvm/llvm_offline_cache.h @@ -45,7 +45,7 @@ struct LlvmOfflineCache { struct FieldCacheData { struct SNodeCacheData { int id; - int type; + SNodeType type; size_t cell_size_bytes; size_t chunk_size; @@ -53,6 +53,7 @@ struct LlvmOfflineCache { }; int tree_id; + int root_id; size_t root_size; std::vector snode_metas; diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index e922a9ebe8026..eea60dad165f7 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -160,103 +160,94 @@ LlvmProgramImpl::clone_struct_compiler_initial_context( void LlvmProgramImpl::initialize_llvm_runtime_snodes( const LlvmOfflineCache::FieldCacheData &field_cache_data, - uint64 *result_buffer); -TaichiLLVMContext *tlctx = nullptr; -if (config->arch == Arch::cuda) { + uint64 *result_buffer) { + TaichiLLVMContext *tlctx = nullptr; + if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - tlctx = llvm_context_device_.get(); + tlctx = llvm_context_device_.get(); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif -} else { - tlctx = llvm_context_host_.get(); -} + } else { + tlctx = llvm_context_host_.get(); + } -auto *const runtime_jit = tlctx -> runtime_jit_module; -// By the time this creator is called, "this" is already destroyed. -// Therefore it is necessary to capture members by values. -size_t root_size = field_cache_data.root_size; -const auto snode_metas = field_cache_data.snode_metas; -const int root_id = field_cache_data.tree_id; - -TI_TRACE("Allocating data structure of size {} bytes", root_size); -std::size_t rounded_size = taichi::iroundup(root_size, taichi_page_size); - -Ptr root_buffer = snode_tree_buffer_manager_->allocate(runtime_jit, - llvm_runtime_, - rounded_size, - taichi_page_size, - root_id, - result_buffer); -if (config->arch == Arch::cuda) { + auto *const runtime_jit = tlctx->runtime_jit_module; + // By the time this creator is called, "this" is already destroyed. + // Therefore it is necessary to capture members by values. + size_t root_size = field_cache_data.root_size; + const auto snode_metas = field_cache_data.snode_metas; + const int tree_id = field_cache_data.tree_id; + const int root_id = field_cache_data.root_id; + + TI_TRACE("Allocating data structure of size {} bytes", root_size); + std::size_t rounded_size = taichi::iroundup(root_size, taichi_page_size); + + Ptr root_buffer = snode_tree_buffer_manager_->allocate( + runtime_jit, llvm_runtime_, rounded_size, taichi_page_size, tree_id, + result_buffer); + if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - CUDADriver::get_instance().memset(root_buffer, 0, rounded_size); + CUDADriver::get_instance().memset(root_buffer, 0, rounded_size); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif -} else { - std::memset(root_buffer, 0, rounded_size); -} + } else { + std::memset(root_buffer, 0, rounded_size); + } -DeviceAllocation alloc{kDeviceNullAllocation}; + DeviceAllocation alloc{kDeviceNullAllocation}; -if (config->arch == Arch::cuda) { + if (config->arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - alloc = cuda_device()->import_memory(root_buffer, rounded_size); + alloc = cuda_device()->import_memory(root_buffer, rounded_size); #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED #endif -} else { - alloc = cpu_device()->import_memory(root_buffer, rounded_size); -} + } else { + alloc = cpu_device()->import_memory(root_buffer, rounded_size); + } -snode_tree_allocs_[tree->id()] = alloc; + snode_tree_allocs_[tree_id] = alloc; -bool all_dense = config->demote_dense_struct_fors; -for (size_t i = 0; i < snode_metas.size(); i++) { - if (snode_metas[i]->type != SNodeType::dense && - snode_metas[i]->type != SNodeType::place && - snode_metas[i]->type != SNodeType::root) { - all_dense = false; - break; + bool all_dense = config->demote_dense_struct_fors; + for (size_t i = 0; i < snode_metas.size(); i++) { + if (snode_metas[i].type != SNodeType::dense && + snode_metas[i].type != SNodeType::place && + snode_metas[i].type != SNodeType::root) { + all_dense = false; + break; + } } -} -runtime_jit->call( - "runtime_initialize_snodes", - llvm_runtime_, - root_size, - root_id, - (int)snode_metas.size(), - root_id, - rounded_size, - root_buffer, - all_dense); - -for (size_t i = 0; i < snode_metas.size(); i++) { - if (is_gc_able(snode_metas[i]->type)) { - const auto snode_id = snode_metas[i].id; - std::size_t node_size; - auto element_size = snode_metas[i].cell_size_bytes; - if (snode_metas[i].type == SNodeType::pointer) { - // pointer. Allocators are for single elements - node_size = element_size; - } else { - // dynamic. Allocators are for the chunks - node_size = sizeof(void *) + element_size * snode_metas[i].chunk_size; + runtime_jit->call( + "runtime_initialize_snodes", llvm_runtime_, root_size, root_id, + (int)snode_metas.size(), tree_id, rounded_size, root_buffer, all_dense); + + for (size_t i = 0; i < snode_metas.size(); i++) { + if (is_gc_able(snode_metas[i].type)) { + const auto snode_id = snode_metas[i].id; + std::size_t node_size; + auto element_size = snode_metas[i].cell_size_bytes; + if (snode_metas[i].type == SNodeType::pointer) { + // pointer. Allocators are for single elements + node_size = element_size; + } else { + // dynamic. Allocators are for the chunks + node_size = sizeof(void *) + element_size * snode_metas[i].chunk_size; + } + TI_TRACE("Initializing allocator for snode {} (node size {})", snode_id, + node_size); + auto rt = llvm_runtime_; + runtime_jit->call( + "runtime_NodeAllocator_initialize", rt, snode_id, node_size); + TI_TRACE("Allocating ambient element for snode {} (node size {})", + snode_id, node_size); + runtime_jit->call("runtime_allocate_ambient", rt, snode_id, + node_size); } - TI_TRACE("Initializing allocator for snode {} (node size {})", snode_id, - node_size); - auto rt = llvm_runtime_; - runtime_jit->call( - "runtime_NodeAllocator_initialize", rt, snode_id, node_size); - TI_TRACE("Allocating ambient element for snode {} (node size {})", snode_id, - node_size); - runtime_jit->call("runtime_allocate_ambient", rt, snode_id, - node_size); } } -} std::unique_ptr LlvmProgramImpl::compile_snode_tree_types_impl( SNodeTree *tree) { @@ -288,10 +279,9 @@ void LlvmProgramImpl::compile_snode_tree_types(SNodeTree *tree) { static LlvmOfflineCache::FieldCacheData construct_filed_cache_data( const SNodeTree &tree, const StructCompiler &struct_compiler) { - TI_ASSERT(tree.id == tree.root()->id); - LlvmOfflineCache::FieldCacheData ret; - ret.tree_id = tree.id; + ret.tree_id = tree.id(); + ret.root_id = tree.root()->id; ret.root_size = struct_compiler.root_size; const auto &snodes = struct_compiler.snodes; From cf72ff8c7bd7eab6203b51bdb0107d7a69453d73 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Wed, 8 Jun 2022 16:06:02 +0800 Subject: [PATCH 3/7] [aot] [llvm] LLVM AOT Field #1: Adjust serialization/deserialization logics for FieldCacheData --- taichi/llvm/llvm_offline_cache.cpp | 14 +++++++++++++ taichi/llvm/llvm_offline_cache.h | 32 ++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/taichi/llvm/llvm_offline_cache.cpp b/taichi/llvm/llvm_offline_cache.cpp index 542f02aebc2c7..92994fe9aa2f0 100644 --- a/taichi/llvm/llvm_offline_cache.cpp +++ b/taichi/llvm/llvm_offline_cache.cpp @@ -51,6 +51,20 @@ LlvmOfflineCacheFileReader::LlvmOfflineCacheFileReader( : path_(path), data_(std::move(data)), format_(format) { } +bool LlvmOfflineCacheFileReader::get_field_cache( + LlvmOfflineCache::FieldCacheData &res, + int snode_tree_id) { + auto itr = data_.fields.find(snode_tree_id); + if (itr == data_.fields.end()) { + TI_DEBUG("Cannot find field with snode_tree_id={}", snode_tree_id); + return false; + } + + const auto &loaded_field_cache = itr->second; + res = loaded_field_cache; // copy assign + return true; +} + bool LlvmOfflineCacheFileReader::get_kernel_cache( LlvmOfflineCache::KernelCacheData &res, const std::string &key, diff --git a/taichi/llvm/llvm_offline_cache.h b/taichi/llvm/llvm_offline_cache.h index 91386c2bffb31..1193365fe30e3 100644 --- a/taichi/llvm/llvm_offline_cache.h +++ b/taichi/llvm/llvm_offline_cache.h @@ -59,10 +59,31 @@ struct LlvmOfflineCache { TI_IO_DEF(tree_id, root_size, snode_metas); - // TODO(zhanlue) - // Serialize/Deserialize the llvm::Module from StructCompiler - // At runtime, make sure loaded Field-Modules and Kernel-Modules are linked - // altogether. + // TODO(zhanlue): refactor llvm::Modules + // + // struct_module will eventually get cloned into each kernel_module, + // so there's no need to serialize it here. + // + // We have three different types of llvm::Module + // 1. runtime_module: contains runtime functions. + // 2. struct_module: contains compiled SNodeTree in llvm::Type. + // 3. kernel_modules: contains compiled kernel codes. + // + // The way those modules work rely on a recursive clone mechanism: + // runtime_module = load("runtime.bc") + // struct_module = clone(runtime_module) + compiled-SNodeTree + // kernel_module = clone(struct_module) + compiled-Kernel + // + // As a result, every kernel_module contains a copy of struct_module + + // runtime_module. + // + // This recursive clone mechanism is super fragile, + // which potentially causes inconsistency between modules if not handled + // properly. + // + // Let's turn to use llvm::link to bind the modules, + // and make runtime_module, struct_module, kernel_module independent of each + // other }; // TODO(zhanlue): we need a better identifier for each FieldCacheData @@ -83,6 +104,9 @@ class LlvmOfflineCacheFileReader { const std::string &key, llvm::LLVMContext &llvm_ctx); + bool get_field_cache(LlvmOfflineCache::FieldCacheData &res, + int snode_tree_id); + static std::unique_ptr make( const std::string &path, LlvmOfflineCache::Format format = LlvmOfflineCache::Format::LL); From dc0e12b31a1e8ac6f496823481236a0e470a0f93 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Thu, 9 Jun 2022 15:59:51 +0800 Subject: [PATCH 4/7] [llvm] [aot] Added Field support for LLVM AOT --- taichi/aot/module_builder.h | 8 ++++ taichi/ir/snode.cpp | 2 +- taichi/ir/snode.h | 2 +- taichi/llvm/llvm_aot_module_builder.cpp | 34 ++++++++++++++ taichi/llvm/llvm_aot_module_builder.h | 8 ++++ taichi/llvm/llvm_aot_module_loader.cpp | 50 ++++++++++++++++++++ taichi/llvm/llvm_aot_module_loader.h | 10 ++++ taichi/llvm/llvm_program.cpp | 62 +++++++++++++++---------- taichi/llvm/llvm_program.h | 23 ++++++--- taichi/program/program.cpp | 4 +- 10 files changed, 169 insertions(+), 34 deletions(-) diff --git a/taichi/aot/module_builder.h b/taichi/aot/module_builder.h index 02cdb9a83ded4..b7da8d76cb6d0 100644 --- a/taichi/aot/module_builder.h +++ b/taichi/aot/module_builder.h @@ -40,6 +40,13 @@ class AotModuleBuilder { void add_graph(const std::string &name, const aot::CompiledGraph &graph); + void set_program(Program *prog) { + prog_ = prog; + } + Program *get_mutable_program() { + return prog_; + } + protected: /** * Intended to be overriden by each backend's implementation. @@ -81,6 +88,7 @@ class AotModuleBuilder { private: std::unordered_map graphs_; + Program *prog_ = nullptr; }; } // namespace lang diff --git a/taichi/ir/snode.cpp b/taichi/ir/snode.cpp index 1a583cda431b5..f36511cb27b5f 100644 --- a/taichi/ir/snode.cpp +++ b/taichi/ir/snode.cpp @@ -326,7 +326,7 @@ void SNode::set_snode_tree_id(int id) { snode_tree_id_ = id; } -int SNode::get_snode_tree_id() { +int SNode::get_snode_tree_id() const { return snode_tree_id_; } diff --git a/taichi/ir/snode.h b/taichi/ir/snode.h index 8a21721c2a7bc..da7560501d97f 100644 --- a/taichi/ir/snode.h +++ b/taichi/ir/snode.h @@ -354,7 +354,7 @@ class SNode { void set_snode_tree_id(int id); - int get_snode_tree_id(); + int get_snode_tree_id() const; static void reset_counter() { counter = 0; diff --git a/taichi/llvm/llvm_aot_module_builder.cpp b/taichi/llvm/llvm_aot_module_builder.cpp index d23ee5c47c564..910465fb43fb0 100644 --- a/taichi/llvm/llvm_aot_module_builder.cpp +++ b/taichi/llvm/llvm_aot_module_builder.cpp @@ -2,6 +2,7 @@ #include #include "taichi/llvm/launch_arg_info.h" +#include "taichi/llvm/llvm_program.h" namespace taichi { namespace lang { @@ -34,5 +35,38 @@ void LlvmAotModuleBuilder::add_per_backend(const std::string &identifier, cache_.kernels[identifier] = std::move(kcache); } +void LlvmAotModuleBuilder::add_field_per_backend(const std::string &identifier, + const SNode *rep_snode, + bool is_scalar, + DataType dt, + std::vector shape, + int row_num, + int column_num) { + // Field refers to a leaf node(Place SNode) in a SNodeTree. + // It makes no sense to just serialize the leaf node or its corresponding + // branch. Instead, the minimal unit we have to serialize is the entire + // SNodeTree. Note that SNodeTree's uses snode_tree_id as its identifier, + // rather than the field's name. (multiple fields may end up referring to the + // same SNodeTree) + + // 1. Find snode_tree_id + int snode_tree_id = rep_snode->get_snode_tree_id(); + + // 2. Fetch Cache from the Program + // Kernel compilation is not allowed until all the Fields are finalized, + // so we finished SNodeTree compilation during AOTModuleBuilder construction. + // + // By the time "add_field_per_backend()" is called, + // SNodeTrees should have already been finalized, + // with compiled info stored in LlvmProgramImpl::cache_data_. + const LlvmProgramImpl *prog = + this->get_mutable_program()->get_llvm_program_impl(); + LlvmOfflineCache::FieldCacheData field_cache = + prog->get_cached_field(snode_tree_id); + + // 3. Update AOT Cache + cache_.fields[snode_tree_id] = std::move(field_cache); +} + } // namespace lang } // namespace taichi diff --git a/taichi/llvm/llvm_aot_module_builder.h b/taichi/llvm/llvm_aot_module_builder.h index b88133a761783..a5fa2558b4a16 100644 --- a/taichi/llvm/llvm_aot_module_builder.h +++ b/taichi/llvm/llvm_aot_module_builder.h @@ -16,6 +16,14 @@ class LlvmAotModuleBuilder : public AotModuleBuilder { void add_per_backend(const std::string &identifier, Kernel *kernel) override; virtual CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) = 0; + void add_field_per_backend(const std::string &identifier, + const SNode *rep_snode, + bool is_scalar, + DataType dt, + std::vector shape, + int row_num, + int column_num) override; + private: mutable LlvmOfflineCache cache_; }; diff --git a/taichi/llvm/llvm_aot_module_loader.cpp b/taichi/llvm/llvm_aot_module_loader.cpp index 5d725927388d7..a7ffa168cf117 100644 --- a/taichi/llvm/llvm_aot_module_loader.cpp +++ b/taichi/llvm/llvm_aot_module_loader.cpp @@ -17,6 +17,24 @@ class KernelImpl : public aot::Kernel { FunctionType fn_; }; +class FieldImpl : public aot::Field { + public: + explicit FieldImpl(const LlvmOfflineCache::FieldCacheData &field) + : field_(field) { + } + + explicit FieldImpl(LlvmOfflineCache::FieldCacheData &&field) + : field_(std::move(field)) { + } + + LlvmOfflineCache::FieldCacheData get_field() const { + return field_; + } + + private: + LlvmOfflineCache::FieldCacheData field_; +}; + } // namespace LlvmOfflineCache::KernelCacheData LlvmAotModule::load_kernel_from_cache( @@ -37,5 +55,37 @@ std::unique_ptr LlvmAotModule::make_new_kernel( return std::make_unique(fn); } +std::unique_ptr LlvmAotModule::make_new_field( + const std::string &name) { + // Check if "name" represents snode_tree_id. + // Avoid using std::atoi due to its poor error handling. + char *end; + int snode_tree_id = static_cast(strtol(name.c_str(), &end, 10 /*base*/)); + + TI_ASSERT(end != name.c_str()); + TI_ASSERT(*end == '\0'); + + // Load FieldCache + LlvmOfflineCache::FieldCacheData loaded; + auto ok = cache_reader_->get_field_cache(loaded, snode_tree_id); + TI_ERROR_IF(!ok, "Failed to load field with id={}", snode_tree_id); + + return std::make_unique(std::move(loaded)); +} + +void finalize_aot_field(aot::Module *aot_module, + aot::Field *aot_field, + uint64 *result_buffer) { + auto *llvm_aot_module = dynamic_cast(aot_module); + auto *aot_field_impl = dynamic_cast(aot_field); + + TI_ASSERT(llvm_aot_module != nullptr); + TI_ASSERT(aot_field_impl != nullptr); + + auto *llvm_prog = llvm_aot_module->get_program(); + const auto &field_cache = aot_field_impl->get_field(); + llvm_prog->initialize_llvm_runtime_snodes(field_cache, result_buffer); +} + } // namespace lang } // namespace taichi diff --git a/taichi/llvm/llvm_aot_module_loader.h b/taichi/llvm/llvm_aot_module_loader.h index b5e8f527cea67..21e2be11ee221 100644 --- a/taichi/llvm/llvm_aot_module_loader.h +++ b/taichi/llvm/llvm_aot_module_loader.h @@ -6,6 +6,10 @@ namespace taichi { namespace lang { +TI_DLL_EXPORT void finalize_aot_field(aot::Module *aot_module, + aot::Field *aot_field, + uint64 *result_buffer); + class LlvmAotModule : public aot::Module { public: explicit LlvmAotModule(const std::string &module_path, @@ -27,6 +31,10 @@ class LlvmAotModule : public aot::Module { return 0; } + LlvmProgramImpl *const get_program() { + return program_; + } + protected: virtual FunctionType convert_module_to_function( const std::string &name, @@ -38,6 +46,8 @@ class LlvmAotModule : public aot::Module { std::unique_ptr make_new_kernel( const std::string &name) override; + std::unique_ptr make_new_field(const std::string &name) override; + LlvmProgramImpl *const program_{nullptr}; std::unique_ptr cache_reader_{nullptr}; }; diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index eea60dad165f7..30adf5f9ccf79 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -273,37 +273,22 @@ std::unique_ptr LlvmProgramImpl::compile_snode_tree_types_impl( } void LlvmProgramImpl::compile_snode_tree_types(SNodeTree *tree) { - compile_snode_tree_types_impl(tree); -} - -static LlvmOfflineCache::FieldCacheData construct_filed_cache_data( - const SNodeTree &tree, - const StructCompiler &struct_compiler) { - LlvmOfflineCache::FieldCacheData ret; - ret.tree_id = tree.id(); - ret.root_id = tree.root()->id; - ret.root_size = struct_compiler.root_size; - - const auto &snodes = struct_compiler.snodes; - for (size_t i = 0; i < snodes.size(); i++) { - LlvmOfflineCache::FieldCacheData::SNodeCacheData snode_cache_data; - snode_cache_data.id = snodes[i]->id; - snode_cache_data.type = snodes[i]->type; - snode_cache_data.cell_size_bytes = snodes[i]->cell_size_bytes; - snode_cache_data.chunk_size = snodes[i]->chunk_size; - - ret.snode_metas.emplace_back(std::move(snode_cache_data)); - } + auto struct_compiler = compile_snode_tree_types_impl(tree); + int snode_tree_id = tree->id(); + int root_id = tree->root()->id; - return ret; + // Add compiled result to Cache + cache_field(snode_tree_id, root_id, *struct_compiler); } void LlvmProgramImpl::materialize_snode_tree(SNodeTree *tree, uint64 *result_buffer) { - auto struct_compiler = compile_snode_tree_types_impl(tree); + compile_snode_tree_types(tree); + int snode_tree_id = tree->id(); - auto field_cache_data = construct_filed_cache_data(*tree, *struct_compiler); - initialize_llvm_runtime_snodes(field_cache_data, result_buffer); + TI_ASSERT(cache_data_.fields.find(snode_tree_id) != cache_data_.fields.end()); + initialize_llvm_runtime_snodes(cache_data_.fields.at(snode_tree_id), + result_buffer); } uint64 LlvmProgramImpl::fetch_result_uint64(int i, uint64 *result_buffer) { @@ -701,6 +686,33 @@ void LlvmProgramImpl::cache_kernel( kernel_cache.offloaded_task_list = std::move(offloaded_task_list); } +void LlvmProgramImpl::cache_field(int snode_tree_id, + int root_id, + const StructCompiler &struct_compiler) { + if (cache_data_.fields.find(snode_tree_id) != cache_data_.fields.end()) { + // [TODO] check and update the Cache, instead of simply return. + return; + } + + LlvmOfflineCache::FieldCacheData ret; + ret.tree_id = snode_tree_id; + ret.root_id = root_id; + ret.root_size = struct_compiler.root_size; + + const auto &snodes = struct_compiler.snodes; + for (size_t i = 0; i < snodes.size(); i++) { + LlvmOfflineCache::FieldCacheData::SNodeCacheData snode_cache_data; + snode_cache_data.id = snodes[i]->id; + snode_cache_data.type = snodes[i]->type; + snode_cache_data.cell_size_bytes = snodes[i]->cell_size_bytes; + snode_cache_data.chunk_size = snodes[i]->chunk_size; + + ret.snode_metas.emplace_back(std::move(snode_cache_data)); + } + + cache_data_.fields[snode_tree_id] = std::move(ret); +} + void LlvmProgramImpl::dump_cache_data_to_disk() { if (config->offline_cache && !cache_data_.kernels.empty()) { LlvmOfflineCacheFileWriter writer{}; diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h index 69378ee660bf1..2eec64dd8e7bd 100644 --- a/taichi/llvm/llvm_program.h +++ b/taichi/llvm/llvm_program.h @@ -118,10 +118,27 @@ class LlvmProgramImpl : public ProgramImpl { std::vector &&offloaded_task_list); + void cache_field(int snode_tree_id, + int root_id, + const StructCompiler &struct_compiler); + + LlvmOfflineCache::FieldCacheData get_cached_field(int snode_tree_id) const { + TI_ASSERT(cache_data_.fields.find(snode_tree_id) != + cache_data_.fields.end()); + return cache_data_.fields.at(snode_tree_id); + } + Device *get_compute_device() override { return device_.get(); } + /** + * Initializes the SNodes for LLVM based backends. + */ + void initialize_llvm_runtime_snodes( + const LlvmOfflineCache::FieldCacheData &field_cache_data, + uint64 *result_buffer); + private: std::unique_ptr clone_struct_compiler_initial_context( bool has_multiple_snode_trees, @@ -129,12 +146,6 @@ class LlvmProgramImpl : public ProgramImpl { std::unique_ptr compile_snode_tree_types_impl( SNodeTree *tree); - /** - * Initializes the SNodes for LLVM based backends. - */ - void initialize_llvm_runtime_snodes( - const LlvmOfflineCache::FieldCacheData &field_cache_data, - uint64 *result_buffer); uint64 fetch_result_uint64(int i, uint64 *result_buffer); diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 994fcbfbdf351..f55ebab88b096 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -610,7 +610,9 @@ std::unique_ptr Program::make_aot_module_builder(Arch arch) { } if (arch_uses_llvm(config.arch) || config.arch == Arch::metal || config.arch == Arch::vulkan || config.arch == Arch::opengl) { - return program_impl_->make_aot_module_builder(); + auto aot_builder = program_impl_->make_aot_module_builder(); + aot_builder->set_program(this); + return aot_builder; } return nullptr; } From 417f3fb3c60122f5f3878760fd66e86fedfc265a Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Thu, 9 Jun 2022 18:00:35 +0800 Subject: [PATCH 5/7] [aot] [llvm] LLVM AOT Field #2: Updated LLVM AOTModuleLoader & AOTModuleBuilder to support Fields --- taichi/backends/cpu/aot_module_loader_impl.cpp | 5 ----- taichi/backends/cuda/aot_module_loader_impl.cpp | 5 ----- taichi/llvm/llvm_aot_module_loader.cpp | 7 ++++++- taichi/llvm/llvm_aot_module_loader.h | 11 +++++++++++ taichi/llvm/llvm_offline_cache.h | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/taichi/backends/cpu/aot_module_loader_impl.cpp b/taichi/backends/cpu/aot_module_loader_impl.cpp index e2ff3b2ecf0f6..16c297dced325 100644 --- a/taichi/backends/cpu/aot_module_loader_impl.cpp +++ b/taichi/backends/cpu/aot_module_loader_impl.cpp @@ -44,11 +44,6 @@ class AotModuleImpl : public LlvmAotModule { TI_NOT_IMPLEMENTED; return nullptr; } - - std::unique_ptr make_new_field(const std::string &name) override { - TI_NOT_IMPLEMENTED; - return nullptr; - } }; } // namespace diff --git a/taichi/backends/cuda/aot_module_loader_impl.cpp b/taichi/backends/cuda/aot_module_loader_impl.cpp index b08efdc9632da..69bf52d749772 100644 --- a/taichi/backends/cuda/aot_module_loader_impl.cpp +++ b/taichi/backends/cuda/aot_module_loader_impl.cpp @@ -44,11 +44,6 @@ class AotModuleImpl : public LlvmAotModule { TI_NOT_IMPLEMENTED; return nullptr; } - - std::unique_ptr make_new_field(const std::string &name) override { - TI_NOT_IMPLEMENTED; - return nullptr; - } }; } // namespace diff --git a/taichi/llvm/llvm_aot_module_loader.cpp b/taichi/llvm/llvm_aot_module_loader.cpp index a7ffa168cf117..99ca51f665363 100644 --- a/taichi/llvm/llvm_aot_module_loader.cpp +++ b/taichi/llvm/llvm_aot_module_loader.cpp @@ -84,7 +84,12 @@ void finalize_aot_field(aot::Module *aot_module, auto *llvm_prog = llvm_aot_module->get_program(); const auto &field_cache = aot_field_impl->get_field(); - llvm_prog->initialize_llvm_runtime_snodes(field_cache, result_buffer); + + int snode_tree_id = field_cache.tree_id; + if (!llvm_aot_module->is_snode_tree_initialized(snode_tree_id)) { + llvm_prog->initialize_llvm_runtime_snodes(field_cache, result_buffer); + llvm_aot_module->set_initialized_snode_tree(snode_tree_id); + } } } // namespace lang diff --git a/taichi/llvm/llvm_aot_module_loader.h b/taichi/llvm/llvm_aot_module_loader.h index 21e2be11ee221..1e4e093bcfc2c 100644 --- a/taichi/llvm/llvm_aot_module_loader.h +++ b/taichi/llvm/llvm_aot_module_loader.h @@ -35,6 +35,14 @@ class LlvmAotModule : public aot::Module { return program_; } + void set_initialized_snode_tree(int snode_tree_id) { + initialized_snode_tree_ids.insert(snode_tree_id); + } + + bool is_snode_tree_initialized(int snode_tree_id) { + return initialized_snode_tree_ids.count(snode_tree_id); + } + protected: virtual FunctionType convert_module_to_function( const std::string &name, @@ -50,6 +58,9 @@ class LlvmAotModule : public aot::Module { LlvmProgramImpl *const program_{nullptr}; std::unique_ptr cache_reader_{nullptr}; + + // To prevent repeated SNodeTree initialization + std::unordered_set initialized_snode_tree_ids; }; } // namespace lang diff --git a/taichi/llvm/llvm_offline_cache.h b/taichi/llvm/llvm_offline_cache.h index 1193365fe30e3..bd7f7900cfb95 100644 --- a/taichi/llvm/llvm_offline_cache.h +++ b/taichi/llvm/llvm_offline_cache.h @@ -95,7 +95,7 @@ struct LlvmOfflineCache { std::unordered_map kernels; // key = kernel_name - TI_IO_DEF(kernels); + TI_IO_DEF(fields, kernels); }; class LlvmOfflineCacheFileReader { From 1480736b797a02ba6327aaa2f96aa0952977413a Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Fri, 10 Jun 2022 15:34:34 +0800 Subject: [PATCH 6/7] Fixed merge issues --- taichi/llvm/llvm_program.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index 00a5a9935c011..30adf5f9ccf79 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -281,28 +281,6 @@ void LlvmProgramImpl::compile_snode_tree_types(SNodeTree *tree) { cache_field(snode_tree_id, root_id, *struct_compiler); } -static LlvmOfflineCache::FieldCacheData construct_filed_cache_data( - const SNodeTree &tree, - const StructCompiler &struct_compiler) { - LlvmOfflineCache::FieldCacheData ret; - ret.tree_id = tree.id(); - ret.root_id = tree.root()->id; - ret.root_size = struct_compiler.root_size; - - const auto &snodes = struct_compiler.snodes; - for (size_t i = 0; i < snodes.size(); i++) { - LlvmOfflineCache::FieldCacheData::SNodeCacheData snode_cache_data; - snode_cache_data.id = snodes[i]->id; - snode_cache_data.type = snodes[i]->type; - snode_cache_data.cell_size_bytes = snodes[i]->cell_size_bytes; - snode_cache_data.chunk_size = snodes[i]->chunk_size; - - ret.snode_metas.emplace_back(std::move(snode_cache_data)); - } - - return ret; -} - void LlvmProgramImpl::materialize_snode_tree(SNodeTree *tree, uint64 *result_buffer) { compile_snode_tree_types(tree); From 52872e6f6f7cff2c3cd7fc8695a7bf5ad72ebc4c Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Mon, 13 Jun 2022 14:35:07 +0800 Subject: [PATCH 7/7] Stopped abusing Program* --- taichi/aot/module_builder.h | 8 -------- taichi/backends/cpu/aot_module_builder_impl.h | 5 +++++ taichi/backends/cuda/aot_module_builder_impl.h | 5 +++++ taichi/llvm/llvm_aot_module_builder.cpp | 5 ++--- taichi/llvm/llvm_aot_module_builder.h | 4 ++++ taichi/llvm/llvm_program.cpp | 4 ++-- taichi/program/program.cpp | 4 +--- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/taichi/aot/module_builder.h b/taichi/aot/module_builder.h index b7da8d76cb6d0..02cdb9a83ded4 100644 --- a/taichi/aot/module_builder.h +++ b/taichi/aot/module_builder.h @@ -40,13 +40,6 @@ class AotModuleBuilder { void add_graph(const std::string &name, const aot::CompiledGraph &graph); - void set_program(Program *prog) { - prog_ = prog; - } - Program *get_mutable_program() { - return prog_; - } - protected: /** * Intended to be overriden by each backend's implementation. @@ -88,7 +81,6 @@ class AotModuleBuilder { private: std::unordered_map graphs_; - Program *prog_ = nullptr; }; } // namespace lang diff --git a/taichi/backends/cpu/aot_module_builder_impl.h b/taichi/backends/cpu/aot_module_builder_impl.h index 1d81fa41d7c2e..039174aa88503 100644 --- a/taichi/backends/cpu/aot_module_builder_impl.h +++ b/taichi/backends/cpu/aot_module_builder_impl.h @@ -9,6 +9,11 @@ namespace lang { namespace cpu { class AotModuleBuilderImpl : public LlvmAotModuleBuilder { + public: + explicit AotModuleBuilderImpl(LlvmProgramImpl *prog) + : LlvmAotModuleBuilder(prog) { + } + private: CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) override; }; diff --git a/taichi/backends/cuda/aot_module_builder_impl.h b/taichi/backends/cuda/aot_module_builder_impl.h index f0fdc74e14f9c..94ac89380d1e0 100644 --- a/taichi/backends/cuda/aot_module_builder_impl.h +++ b/taichi/backends/cuda/aot_module_builder_impl.h @@ -9,6 +9,11 @@ namespace lang { namespace cuda { class AotModuleBuilderImpl : public LlvmAotModuleBuilder { + public: + explicit AotModuleBuilderImpl(LlvmProgramImpl *prog) + : LlvmAotModuleBuilder(prog) { + } + private: CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) override; }; diff --git a/taichi/llvm/llvm_aot_module_builder.cpp b/taichi/llvm/llvm_aot_module_builder.cpp index 910465fb43fb0..664ee933893c9 100644 --- a/taichi/llvm/llvm_aot_module_builder.cpp +++ b/taichi/llvm/llvm_aot_module_builder.cpp @@ -59,10 +59,9 @@ void LlvmAotModuleBuilder::add_field_per_backend(const std::string &identifier, // By the time "add_field_per_backend()" is called, // SNodeTrees should have already been finalized, // with compiled info stored in LlvmProgramImpl::cache_data_. - const LlvmProgramImpl *prog = - this->get_mutable_program()->get_llvm_program_impl(); + TI_ASSERT(prog_ != nullptr); LlvmOfflineCache::FieldCacheData field_cache = - prog->get_cached_field(snode_tree_id); + prog_->get_cached_field(snode_tree_id); // 3. Update AOT Cache cache_.fields[snode_tree_id] = std::move(field_cache); diff --git a/taichi/llvm/llvm_aot_module_builder.h b/taichi/llvm/llvm_aot_module_builder.h index a5fa2558b4a16..857f237c4a73c 100644 --- a/taichi/llvm/llvm_aot_module_builder.h +++ b/taichi/llvm/llvm_aot_module_builder.h @@ -9,6 +9,9 @@ namespace lang { class LlvmAotModuleBuilder : public AotModuleBuilder { public: + explicit LlvmAotModuleBuilder(LlvmProgramImpl *prog) : prog_(prog) { + } + void dump(const std::string &output_dir, const std::string &filename) const override; @@ -26,6 +29,7 @@ class LlvmAotModuleBuilder : public AotModuleBuilder { private: mutable LlvmOfflineCache cache_; + LlvmProgramImpl *prog_ = nullptr; }; } // namespace lang diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index 30adf5f9ccf79..a805ade265e37 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -350,12 +350,12 @@ void LlvmProgramImpl::print_list_manager_info(void *list_manager, std::unique_ptr LlvmProgramImpl::make_aot_module_builder() { if (config->arch == Arch::x64 || config->arch == Arch::arm64) { - return std::make_unique(); + return std::make_unique(this); } #if defined(TI_WITH_CUDA) if (config->arch == Arch::cuda) { - return std::make_unique(); + return std::make_unique(this); } #endif diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 62562bcc004a1..d94b6b6ddff2b 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -610,9 +610,7 @@ std::unique_ptr Program::make_aot_module_builder(Arch arch) { } if (arch_uses_llvm(config.arch) || config.arch == Arch::metal || config.arch == Arch::vulkan || config.arch == Arch::opengl) { - auto aot_builder = program_impl_->make_aot_module_builder(); - aot_builder->set_program(this); - return aot_builder; + return program_impl_->make_aot_module_builder(); } return nullptr; }