Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load ukernel bitcode as executable_object at the time of lowering to ukernels. #19323

Merged
merged 3 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def iree_amdgpu_bitcode_library(self, name, gpu_arch, srcs, copts=None, out=None
"GPU_ARCH", gpu_arch, quote=False
)
srcs_block = self._convert_srcs_block(srcs)
out_block = self._convert_string_arg_block("OUT", out, quote=False)
out_block = self._convert_string_arg_block("OUT", out, quote=True)
copts_block = self._convert_string_list_block("COPTS", copts, sort=False)

self._converter.body += (
Expand Down
5 changes: 1 addition & 4 deletions compiler/plugins/target/ROCM/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ iree_compiler_cc_library(
"ROCMTargetUtils.h",
],
deps = [
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_gfx1030",
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_gfx1100",
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_gfx90a",
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_gfx942",
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_bitcode",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils:KnownTargets",
Expand Down
5 changes: 1 addition & 4 deletions compiler/plugins/target/ROCM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,7 @@ iree_cc_library(
iree::compiler::Dialect::HAL::Utils::LLVMLinkerUtils
iree::compiler::PluginAPI
iree::compiler::Utils
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_gfx1030
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_gfx1100
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_gfx90a
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_gfx942
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_bitcode
iree::schemas::amdgpu_executable_def_c_fbs
iree::schemas::executable_debug_info_c_fbs
iree::schemas::hip_executable_def_c_fbs
Expand Down
36 changes: 20 additions & 16 deletions compiler/plugins/target/ROCM/ROCMTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <cstdint>

#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_bitcode.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h"
Expand All @@ -21,6 +22,7 @@
#include "iree/compiler/Dialect/HAL/Utils/ExecutableDebugInfoUtils.h"
#include "iree/compiler/Dialect/HAL/Utils/LLVMLinkerUtils.h"
#include "iree/compiler/PluginAPI/Client.h"
#include "iree/compiler/Utils/EmbeddedDataDirectory.h"
#include "iree/compiler/Utils/FlatbufferUtils.h"
#include "iree/compiler/Utils/ToolUtils.h"
#include "iree/schemas/amdgpu_executable_def_builder.h"
Expand Down Expand Up @@ -206,6 +208,7 @@ static std::string translateModuleToISA(llvm::Module &module,
}
return targetISA;
}

} // namespace

class ROCMTargetBackend final : public TargetBackend {
Expand Down Expand Up @@ -513,20 +516,6 @@ class ROCMTargetBackend final : public TargetBackend {
return failure();
}

// Link module to any enabled ukernels.
StringRef bitcodeDirectory = options.bitcodeDirectory;
StringRef enabledUkernels;
if (auto attr = getConfigStringAttr(targetAttr, "ukernels"))
enabledUkernels = attr->getValue();
if (!enabledUkernels.empty() && enabledUkernels != "none") {
if (failed(linkUkernelBitcodeFiles(
variantOp.getLoc(), llvmModule.get(), enabledUkernels,
targetArch, bitcodeDirectory, llvm::Linker::OverrideFromSrc,
*targetMachine))) {
return failure();
}
}

// Link bitcode (*.bc) object attrs specified by the input program.
// Note that this happens after the command-line files so that the command
// line ones override the symbols coming from the embedded files.
Expand All @@ -548,14 +537,15 @@ class ROCMTargetBackend final : public TargetBackend {
}

// Link module to HIP device library.
if (bitcodeDirectory.empty()) {
if (options.bitcodeDirectory.empty()) {
return variantOp.emitError()
<< "cannot find ROCM bitcode files. Check your installation "
"consistency and in the worst case, set "
"--iree-hip-bc-dir= to a path on your system.";
}
if (failed(linkHIPBitcodeIfNeeded(variantOp.getLoc(), llvmModule.get(),
targetArch, bitcodeDirectory))) {
targetArch,
options.bitcodeDirectory))) {
return failure();
}

Expand Down Expand Up @@ -881,6 +871,7 @@ class HIPTargetDevice final : public TargetDevice {
};

namespace {

struct ROCMSession final
: PluginSession<ROCMSession, ROCMOptions,
PluginActivationPolicy::DefaultActivated> {
Expand Down Expand Up @@ -910,10 +901,23 @@ struct ROCMSession final

} // namespace mlir::iree_compiler::IREE::HAL

// Iterate over ukernel bitcode embedded-data files, and insert them into the
// EmbeddedDataDirectory singleton.
static void addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory() {
using mlir::iree_compiler::EmbeddedDataDirectory;
EmbeddedDataDirectory::withGlobal([](EmbeddedDataDirectory &dir) {
const iree_file_toc_t *toc = iree_uk_amdgpu_bitcode_create();
for (size_t i = 0; i < iree_uk_amdgpu_bitcode_size(); ++i) {
dir.addFile(toc[i].name, llvm::StringRef{toc[i].data, toc[i].size});
}
});
}

extern "C" bool iree_register_compiler_plugin_hal_target_rocm(
mlir::iree_compiler::PluginRegistrar *registrar) {
registrar->registerPlugin<mlir::iree_compiler::IREE::HAL::ROCMSession>(
"hal_target_rocm");
addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory();
return true;
}

Expand Down
41 changes: 0 additions & 41 deletions compiler/plugins/target/ROCM/ROCMTargetUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@

#include "compiler/plugins/target/ROCM/ROCMTargetUtils.h"

#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1030.h"
#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1100.h"
#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx90a.h"
#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx942.h"
#include "iree/compiler/Codegen/Utils/GPUUtils.h"
#include "iree/compiler/Dialect/HAL/Utils/LLVMLinkerUtils.h"
#include "iree/compiler/Utils/ToolUtils.h"
Expand Down Expand Up @@ -185,43 +181,6 @@ LogicalResult linkHIPBitcodeIfNeeded(Location loc, llvm::Module *module,
return linkWithBitcodeFiles(loc, module, bitcodePaths);
}

static std::tuple<const iree_file_toc_t *, int>
getUkernelBitcodeTOC(StringRef gpuArch) {
return llvm::StringSwitch<std::tuple<const iree_file_toc_t *, int>>(gpuArch)
.Case("gfx90a",
{iree_uk_amdgpu_gfx90a_create(), iree_uk_amdgpu_gfx90a_size()})
.Case("gfx942",
{iree_uk_amdgpu_gfx942_create(), iree_uk_amdgpu_gfx942_size()})
.Case("gfx1030",
{iree_uk_amdgpu_gfx1030_create(), iree_uk_amdgpu_gfx1030_size()})
.Case("gfx1100",
{iree_uk_amdgpu_gfx1100_create(), iree_uk_amdgpu_gfx1100_size()})
.Default({nullptr, 0});
}

// Links optimized Ukernel bitcode into the given module if the module needs it.
LogicalResult linkUkernelBitcodeFiles(Location loc, llvm::Module *module,
StringRef enabledUkernelsStr,
StringRef targetChip,
StringRef bitcodePath,
unsigned linkerFlags,
llvm::TargetMachine &targetMachine) {
auto [toc, toc_size] = getUkernelBitcodeTOC(targetChip);
if (!toc) {
return failure();
}

llvm::Linker linker(*module);
for (int i = 0; i < toc_size; ++i) {
if (failed(linkBitcodeFile(loc, linker, linkerFlags, toc[i].name,
llvm::StringRef(toc[i].data, toc[i].size),
targetMachine, module->getContext())))
return failure();
}

return success();
}

// Link object file using lld lnker to generate code object
// Inspiration from this section comes from LLVM-PROJECT-MLIR by
// ROCmSoftwarePlatform
Expand Down
21 changes: 11 additions & 10 deletions compiler/plugins/target/ROCM/builtins/ukernel/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,20 @@ argmax_types = [
"iree_uk_amdgpu_argmax_%s.c" % type,
"common.h",
],
out = "iree_uk_amdgpu_argmax_%s.%s.bc" % (type, gpu_arch),
gpu_arch = gpu_arch,
) for type in argmax_types for gpu_arch in gpu_archs]

argmax_bc_files = {gpu_arch: [
":iree_uk_amdgpu_argmax_%s.c.%s.bc" % (type, gpu_arch)
argmax_bc_files = [
":iree_uk_amdgpu_argmax_%s.%s.bc" % (type, gpu_arch)
for type in argmax_types
] for gpu_arch in gpu_archs}
for gpu_arch in gpu_archs
]

[iree_c_embed_data(
name = "iree_uk_amdgpu_%s" % gpu_arch,
srcs = argmax_bc_files[gpu_arch],
c_file_output = "iree_uk_amdgpu_%s.c" % gpu_arch,
iree_c_embed_data(
name = "iree_uk_amdgpu_bitcode",
srcs = argmax_bc_files,
c_file_output = "iree_uk_amdgpu_bitcode.c",
flatten = True,
h_file_output = "iree_uk_amdgpu_%s.h" % gpu_arch,
identifier = "iree_uk_amdgpu_%s" % gpu_arch,
) for gpu_arch in gpu_archs]
h_file_output = "iree_uk_amdgpu_bitcode.h",
)
Loading
Loading