Skip to content

Commit

Permalink
[HIP] Move HIP Linking Logic into HIP ToolChain
Browse files Browse the repository at this point in the history
This patch is a follow up on https://reviews.llvm.org/D78759.

Extract the HIP Linker script from generic GNU linker,
and move it into HIP ToolChain. Update OffloadActionBuilder
Link actions feature to apply device linking and host linking
actions separately. Using MC Directives, embed the device images
and define symbols.

Reviewers: JonChesterfield, yaxunl

Subscribers: tra, echristo, jdoerfert, msearles, scchan

Differential Revision: https://reviews.llvm.org/D81963
  • Loading branch information
aaronenyeshi committed Jun 22, 2020
1 parent 315bd96 commit 77df5a8
Show file tree
Hide file tree
Showing 13 changed files with 304 additions and 233 deletions.
66 changes: 54 additions & 12 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2325,8 +2325,11 @@ class OffloadingActionBuilder final {
/// Append top level actions generated by the builder.
virtual void appendTopLevelActions(ActionList &AL) {}

/// Append linker actions generated by the builder.
virtual void appendLinkActions(ActionList &AL) {}
/// Append linker device actions generated by the builder.
virtual void appendLinkDeviceActions(ActionList &AL) {}

/// Append linker host action generated by the builder.
virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }

/// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
Expand Down Expand Up @@ -2796,17 +2799,45 @@ class OffloadingActionBuilder final {
: ABRT_Success;
}

void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
void appendLinkDeviceActions(ActionList &AL) override {
if (DeviceLinkerInputs.size() == 0)
return;

assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
"Linker inputs and GPU arch list sizes do not match.");

// Append a new link action for each device.
unsigned I = 0;
for (auto &LI : DeviceLinkerInputs) {
// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
DA.add(*DeviceLinkAction, *ToolChains[0],
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
DeviceLinkAction->getType()));
++I;
}
DeviceLinkerInputs.clear();

// Create a host object from all the device images by embedding them
// in a fat binary.
OffloadAction::DeviceDependences DDeps;
auto *TopDeviceLinkAction =
C.MakeAction<LinkJobAction>(AL, types::TY_Object);
DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
nullptr, AssociatedOffloadKind);

// Offload the host object to the host linker.
AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
}

Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }

void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
};

/// OpenMP action builder. The host bitcode is passed to the device frontend
Expand Down Expand Up @@ -2934,7 +2965,7 @@ class OffloadingActionBuilder final {
OpenMPDeviceActions.clear();
}

void appendLinkActions(ActionList &AL) override {
void appendLinkDeviceActions(ActionList &AL) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");

Expand All @@ -2953,6 +2984,14 @@ class OffloadingActionBuilder final {
DeviceLinkerInputs.clear();
}

Action* appendLinkHostActions(ActionList &AL) override {
// Create wrapper bitcode from the result of device link actions and compile
// it to an object which will be added to the host link command.
auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
}

void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}

bool initialize() override {
Expand Down Expand Up @@ -3185,17 +3224,20 @@ class OffloadingActionBuilder final {
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendLinkActions(DeviceAL);
SB->appendLinkDeviceActions(DeviceAL);
}

if (DeviceAL.empty())
return nullptr;

// Create wrapper bitcode from the result of device link actions and compile
// it to an object which will be added to the host link command.
auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
// Let builders add host linking actions.
Action* HA;
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
HA = SB->appendLinkHostActions(DeviceAL);
}
return HA;
}

/// Processes the host linker action. This currently consists of replacing it
Expand Down
117 changes: 3 additions & 114 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,12 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");

for (const auto &II : Inputs) {
// If the current tool chain refers to an OpenMP or HIP offloading host, we
// should ignore inputs that refer to OpenMP or HIP offloading devices -
// If the current tool chain refers to an OpenMP offloading host, we
// should ignore inputs that refer to OpenMP offloading devices -
// they will be embedded according to a proper linker script.
if (auto *IA = II.getAction())
if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
IA->isDeviceOffloading(Action::OFK_OpenMP)) ||
(JA.isHostOffloading(Action::OFK_HIP) &&
IA->isDeviceOffloading(Action::OFK_HIP)))
IA->isDeviceOffloading(Action::OFK_OpenMP)))
continue;

if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
Expand Down Expand Up @@ -1298,115 +1296,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
}
}

/// Add HIP linker script arguments at the end of the argument list so that
/// the fat binary is built by embedding the device images into the host. The
/// linker script also defines a symbol required by the code generation so that
/// the image can be retrieved at runtime. This should be used only in tool
/// chains that support linker scripts.
void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
ArgStringList &CmdArgs, const JobAction &JA,
const Tool &T) {

// If this is not a HIP host toolchain, we don't need to do anything.
if (!JA.isHostOffloading(Action::OFK_HIP))
return;

InputInfoList DeviceInputs;
for (const auto &II : Inputs) {
const Action *A = II.getAction();
// Is this a device linking action?
if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
DeviceInputs.push_back(II);
}
}

if (DeviceInputs.empty())
return;

// Create temporary linker script. Keep it if save-temps is enabled.
const char *LKS;
std::string Name =
std::string(llvm::sys::path::filename(Output.getFilename()));
if (C.getDriver().isSaveTempsEnabled()) {
LKS = C.getArgs().MakeArgString(Name + ".lk");
} else {
auto TmpName = C.getDriver().GetTemporaryPath(Name, "lk");
LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName));
}

// Add linker script option to the command.
CmdArgs.push_back("-T");
CmdArgs.push_back(LKS);

// Create a buffer to write the contents of the linker script.
std::string LksBuffer;
llvm::raw_string_ostream LksStream(LksBuffer);

// Get the HIP offload tool chain.
auto *HIPTC = static_cast<const toolchains::HIPToolChain *>(
C.getSingleOffloadToolChain<Action::OFK_HIP>());
assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&
"Wrong platform");
(void)HIPTC;

const char *BundleFile;
if (C.getDriver().isSaveTempsEnabled()) {
BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
} else {
auto TmpName = C.getDriver().GetTemporaryPath(Name, "hipfb");
BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T);

// Add commands to embed target binaries. We ensure that each section and
// image is 16-byte aligned. This is not mandatory, but increases the
// likelihood of data to be aligned with a cache block in several main host
// machines.
LksStream << "/*\n";
LksStream << " HIP Offload Linker Script\n";
LksStream << " *** Automatically generated by Clang ***\n";
LksStream << "*/\n";
LksStream << "TARGET(binary)\n";
LksStream << "INPUT(" << BundleFile << ")\n";
LksStream << "SECTIONS\n";
LksStream << "{\n";
LksStream << " .hip_fatbin :\n";
LksStream << " ALIGN(0x10)\n";
LksStream << " {\n";
LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n";
LksStream << " " << BundleFile << "\n";
LksStream << " }\n";
LksStream << " /DISCARD/ :\n";
LksStream << " {\n";
LksStream << " * ( __CLANG_OFFLOAD_BUNDLE__* )\n";
LksStream << " }\n";
LksStream << "}\n";
LksStream << "INSERT BEFORE .data\n";
LksStream.flush();

// Dump the contents of the linker script if the user requested that. We
// support this option to enable testing of behavior with -###.
if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
llvm::errs() << LksBuffer;

// If this is a dry run, do not create the linker script file.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
return;

// Open script file and write the contents.
std::error_code EC;
llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);

if (EC) {
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return;
}

Lksf << LksBuffer;
}

SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input,
Expand Down
6 changes: 0 additions & 6 deletions clang/lib/Driver/ToolChains/CommonArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
llvm::opt::ArgStringList &CmdArgs,
const llvm::opt::ArgList &Args);

void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA,
const Tool &T);

const char *SplitDebugName(const llvm::opt::ArgList &Args,
const InputInfo &Input, const InputInfo &Output);

Expand Down
4 changes: 0 additions & 4 deletions clang/lib/Driver/ToolChains/Gnu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,10 +625,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
}

// Add HIP offloading linker script args if required.
AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
*this);

Args.AddAllArgs(CmdArgs, options::OPT_T);

const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
Expand Down
74 changes: 74 additions & 0 deletions clang/lib/Driver/ToolChains/HIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,87 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
}

/// Add Generated HIP Object File which has device images embedded into the
/// host to the argument list for linking. Using MC directives, embed the
/// device code and also define symbols required by the code generation so that
/// the image can be retrieved at runtime.
void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
Compilation &C, const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
const JobAction &JA) const {
const ToolChain &TC = getToolChain();
std::string Name =
std::string(llvm::sys::path::stem(Output.getFilename()));

// Create Temp Object File Generator,
// Offload Bundled file and Bundled Object file.
// Keep them if save-temps is enabled.
const char *McinFile;
const char *BundleFile;
if (C.getDriver().isSaveTempsEnabled()) {
McinFile = C.getArgs().MakeArgString(Name + ".mcin");
BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
} else {
auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
}
constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);

// Create a buffer to write the contents of the temp obj generator.
std::string ObjBuffer;
llvm::raw_string_ostream ObjStream(ObjBuffer);

// Add MC directives to embed target binaries. We ensure that each
// section and image is 16-byte aligned. This is not mandatory, but
// increases the likelihood of data to be aligned with a cache block
// in several main host machines.
ObjStream << "# HIP Object Generator\n";
ObjStream << "# *** Automatically generated by Clang ***\n";
ObjStream << " .type __hip_fatbin,@object\n";
ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n";
ObjStream << " .data\n";
ObjStream << " .globl __hip_fatbin\n";
ObjStream << " .p2align 3\n";
ObjStream << "__hip_fatbin:\n";
ObjStream << " .incbin \"" << BundleFile << "\"\n";
ObjStream.flush();

// Dump the contents of the temp object file gen if the user requested that.
// We support this option to enable testing of behavior with -###.
if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
llvm::errs() << ObjBuffer;

// Open script file and write the contents.
std::error_code EC;
llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);

if (EC) {
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return;
}

Objf << ObjBuffer;

ArgStringList McArgs{"-triple", Args.MakeArgString(TC.getTripleString()),
"-o", Output.getFilename(),
McinFile, "--filetype=obj"};
const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
C.addCommand(std::make_unique<Command>(JA, *this, Mc, McArgs, Inputs));
}

// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
if (Inputs.size() > 0 &&
Inputs[0].getType() == types::TY_Image &&
JA.getType() == types::TY_Object)
return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);

if (JA.getType() == types::TY_HIP_FATBIN)
return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/HIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args) const;

// Construct command for creating Object from HIP fatbin.
void constructGenerateObjFileFromHIPFatBinary(Compilation &C,
const InputInfo &Output,
const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
const JobAction &JA) const;
};

} // end namespace AMDGCN
Expand Down
11 changes: 7 additions & 4 deletions clang/test/Driver/hip-binding.hip
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
// RUN: 2>&1 | FileCheck %s

// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"]
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]"
// CHECK-NOT: offload bundler
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]"
// CHECK-NOT: offload bundler
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]"
// CHECK-NOT: offload bundler
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out"

// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
Expand Down
Loading

0 comments on commit 77df5a8

Please sign in to comment.