From 77df5a8283edbfc33ad3b12df3bd42d54d7ba4f4 Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Thu, 11 Jun 2020 18:36:10 +0000 Subject: [PATCH] [HIP] Move HIP Linking Logic into HIP ToolChain This patch is a follow up on https://reviews.llvm.org/D78759. Extract the HIP Linker script from generic GNU linker, and move it into HIP ToolChain. Update OffloadActionBuilder Link actions feature to apply device linking and host linking actions separately. Using MC Directives, embed the device images and define symbols. Reviewers: JonChesterfield, yaxunl Subscribers: tra, echristo, jdoerfert, msearles, scchan Differential Revision: https://reviews.llvm.org/D81963 --- clang/lib/Driver/Driver.cpp | 66 ++++++++-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 117 +---------------- clang/lib/Driver/ToolChains/CommonArgs.h | 6 - clang/lib/Driver/ToolChains/Gnu.cpp | 4 - clang/lib/Driver/ToolChains/HIP.cpp | 74 +++++++++++ clang/lib/Driver/ToolChains/HIP.h | 7 + clang/test/Driver/hip-binding.hip | 11 +- clang/test/Driver/hip-link-save-temps.hip | 8 +- clang/test/Driver/hip-link-shared-library.hip | 11 +- clang/test/Driver/hip-phases.hip | 120 +++++++++++------- clang/test/Driver/hip-save-temps.hip | 47 ++++--- .../Driver/hip-toolchain-rdc-separate.hip | 21 ++- clang/test/Driver/hip-toolchain-rdc.hip | 45 ++++--- 13 files changed, 304 insertions(+), 233 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index a48761af400fda..de732918fc763d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2325,8 +2325,11 @@ class OffloadingActionBuilder final { /// Append top level actions generated by the builder. virtual void appendTopLevelActions(ActionList &AL) {} - /// Append linker actions generated by the builder. - virtual void appendLinkActions(ActionList &AL) {} + /// Append linker device actions generated by the builder. + virtual void appendLinkDeviceActions(ActionList &AL) {} + + /// Append linker host action generated by the builder. + virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; } /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -2796,17 +2799,45 @@ class OffloadingActionBuilder final { : ABRT_Success; } - void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + void appendLinkDeviceActions(ActionList &AL) override { + if (DeviceLinkerInputs.size() == 0) + return; + + assert(DeviceLinkerInputs.size() == GpuArchList.size() && + "Linker inputs and GPU arch list sizes do not match."); + // Append a new link action for each device. unsigned I = 0; for (auto &LI : DeviceLinkerInputs) { + // Each entry in DeviceLinkerInputs corresponds to a GPU arch. auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + // Linking all inputs for the current GPU arch. + // LI contains all the inputs for the linker. + OffloadAction::DeviceDependences DeviceLinkDeps; + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], + CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + AL.push_back(C.MakeAction(DeviceLinkDeps, + DeviceLinkAction->getType())); ++I; } + DeviceLinkerInputs.clear(); + + // Create a host object from all the device images by embedding them + // in a fat binary. + OffloadAction::DeviceDependences DDeps; + auto *TopDeviceLinkAction = + C.MakeAction(AL, types::TY_Object); + DDeps.add(*TopDeviceLinkAction, *ToolChains[0], + nullptr, AssociatedOffloadKind); + + // Offload the host object to the host linker. + AL.push_back(C.MakeAction(DDeps, TopDeviceLinkAction->getType())); } + + Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } + + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} }; /// OpenMP action builder. The host bitcode is passed to the device frontend @@ -2934,7 +2965,7 @@ class OffloadingActionBuilder final { OpenMPDeviceActions.clear(); } - void appendLinkActions(ActionList &AL) override { + void appendLinkDeviceActions(ActionList &AL) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -2953,6 +2984,14 @@ class OffloadingActionBuilder final { DeviceLinkerInputs.clear(); } + Action* appendLinkHostActions(ActionList &AL) override { + // Create wrapper bitcode from the result of device link actions and compile + // it to an object which will be added to the host link command. + auto *BC = C.MakeAction(AL, types::TY_LLVM_BC); + auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); + return C.MakeAction(ASM, types::TY_Object); + } + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} bool initialize() override { @@ -3185,17 +3224,20 @@ class OffloadingActionBuilder final { for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; - SB->appendLinkActions(DeviceAL); + SB->appendLinkDeviceActions(DeviceAL); } if (DeviceAL.empty()) return nullptr; - // Create wrapper bitcode from the result of device link actions and compile - // it to an object which will be added to the host link command. - auto *BC = C.MakeAction(DeviceAL, types::TY_LLVM_BC); - auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); - return C.MakeAction(ASM, types::TY_Object); + // Let builders add host linking actions. + Action* HA; + for (DeviceActionBuilder *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + HA = SB->appendLinkHostActions(DeviceAL); + } + return HA; } /// Processes the host linker action. This currently consists of replacing it diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 7fdb3fdf000948..542fb67db7ef16 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -152,14 +152,12 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); for (const auto &II : Inputs) { - // If the current tool chain refers to an OpenMP or HIP offloading host, we - // should ignore inputs that refer to OpenMP or HIP offloading devices - + // If the current tool chain refers to an OpenMP offloading host, we + // should ignore inputs that refer to OpenMP offloading devices - // they will be embedded according to a proper linker script. if (auto *IA = II.getAction()) if ((JA.isHostOffloading(Action::OFK_OpenMP) && - IA->isDeviceOffloading(Action::OFK_OpenMP)) || - (JA.isHostOffloading(Action::OFK_HIP) && - IA->isDeviceOffloading(Action::OFK_HIP))) + IA->isDeviceOffloading(Action::OFK_OpenMP))) continue; if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType())) @@ -1298,115 +1296,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D, } } -/// Add HIP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding the device images into the host. The -/// linker script also defines a symbol required by the code generation so that -/// the image can be retrieved at runtime. This should be used only in tool -/// chains that support linker scripts. -void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, const ArgList &Args, - ArgStringList &CmdArgs, const JobAction &JA, - const Tool &T) { - - // If this is not a HIP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_HIP)) - return; - - InputInfoList DeviceInputs; - for (const auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && A->isDeviceOffloading(Action::OFK_HIP)) { - DeviceInputs.push_back(II); - } - } - - if (DeviceInputs.empty()) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - std::string Name = - std::string(llvm::sys::path::filename(Output.getFilename())); - if (C.getDriver().isSaveTempsEnabled()) { - LKS = C.getArgs().MakeArgString(Name + ".lk"); - } else { - auto TmpName = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the HIP offload tool chain. - auto *HIPTC = static_cast( - C.getSingleOffloadToolChain()); - assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn && - "Wrong platform"); - (void)HIPTC; - - const char *BundleFile; - if (C.getDriver().isSaveTempsEnabled()) { - BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); - } else { - auto TmpName = C.getDriver().GetTemporaryPath(Name, "hipfb"); - BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T); - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " HIP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - LksStream << "INPUT(" << BundleFile << ")\n"; - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - LksStream << " .hip_fatbin :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n"; - LksStream << " " << BundleFile << "\n"; - LksStream << " }\n"; - LksStream << " /DISCARD/ :\n"; - LksStream << " {\n"; - LksStream << " * ( __CLANG_OFFLOAD_BUNDLE__* )\n"; - LksStream << " }\n"; - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args, const InputInfo &Output, const InputInfo &Input, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 58bc92c9b7569a..c7d695ebf415f0 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -45,12 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D, llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); -void AddHIPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, const JobAction &JA, - const Tool &T); - const char *SplitDebugName(const llvm::opt::ArgList &Args, const InputInfo &Input, const InputInfo &Output); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 93a285d42a08e6..619407a7c57bf9 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -625,10 +625,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - // Add HIP offloading linker script args if required. - AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA, - *this); - Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index b9143f98f15269..f524951ce8375b 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -104,6 +104,76 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, T, Bundler, BundlerArgs, Inputs)); } +/// Add Generated HIP Object File which has device images embedded into the +/// host to the argument list for linking. Using MC directives, embed the +/// device code and also define symbols required by the code generation so that +/// the image can be retrieved at runtime. +void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( + Compilation &C, const InputInfo &Output, + const InputInfoList &Inputs, const ArgList &Args, + const JobAction &JA) const { + const ToolChain &TC = getToolChain(); + std::string Name = + std::string(llvm::sys::path::stem(Output.getFilename())); + + // Create Temp Object File Generator, + // Offload Bundled file and Bundled Object file. + // Keep them if save-temps is enabled. + const char *McinFile; + const char *BundleFile; + if (C.getDriver().isSaveTempsEnabled()) { + McinFile = C.getArgs().MakeArgString(Name + ".mcin"); + BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); + } else { + auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin"); + McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin)); + auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb"); + BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb)); + } + constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this); + + // Create a buffer to write the contents of the temp obj generator. + std::string ObjBuffer; + llvm::raw_string_ostream ObjStream(ObjBuffer); + + // Add MC directives to embed target binaries. We ensure that each + // section and image is 16-byte aligned. This is not mandatory, but + // increases the likelihood of data to be aligned with a cache block + // in several main host machines. + ObjStream << "# HIP Object Generator\n"; + ObjStream << "# *** Automatically generated by Clang ***\n"; + ObjStream << " .type __hip_fatbin,@object\n"; + ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n"; + ObjStream << " .data\n"; + ObjStream << " .globl __hip_fatbin\n"; + ObjStream << " .p2align 3\n"; + ObjStream << "__hip_fatbin:\n"; + ObjStream << " .incbin \"" << BundleFile << "\"\n"; + ObjStream.flush(); + + // Dump the contents of the temp object file gen if the user requested that. + // We support this option to enable testing of behavior with -###. + if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) + llvm::errs() << ObjBuffer; + + // Open script file and write the contents. + std::error_code EC; + llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None); + + if (EC) { + C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); + return; + } + + Objf << ObjBuffer; + + ArgStringList McArgs{"-triple", Args.MakeArgString(TC.getTripleString()), + "-o", Output.getFilename(), + McinFile, "--filetype=obj"}; + const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); + C.addCommand(std::make_unique(JA, *this, Mc, McArgs, Inputs)); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -111,6 +181,10 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + if (Inputs.size() > 0 && + Inputs[0].getType() == types::TY_Image && + JA.getType() == types::TY_Object) + return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA); if (JA.getType() == types::TY_HIP_FATBIN) return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h index fa6984ed9f52ff..5e2be7138579a4 100644 --- a/clang/lib/Driver/ToolChains/HIP.h +++ b/clang/lib/Driver/ToolChains/HIP.h @@ -42,6 +42,13 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool { void constructLldCommand(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const InputInfo &Output, const llvm::opt::ArgList &Args) const; + + // Construct command for creating Object from HIP fatbin. + void constructGenerateObjFileFromHIPFatBinary(Compilation &C, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + const JobAction &JA) const; }; } // end namespace AMDGCN diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip index 53fe9af0e653c9..3d839302234629 100644 --- a/clang/test/Driver/hip-binding.hip +++ b/clang/test/Driver/hip-binding.hip @@ -25,12 +25,15 @@ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ // RUN: 2>&1 | FileCheck %s -// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]" +// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"] +// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"] +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]" +// CHECK-NOT: offload bundler +// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out" // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\ diff --git a/clang/test/Driver/hip-link-save-temps.hip b/clang/test/Driver/hip-link-save-temps.hip index 304be921ebc628..62d8d60af3c099 100644 --- a/clang/test/Driver/hip-link-save-temps.hip +++ b/clang/test/Driver/hip-link-save-temps.hip @@ -27,7 +27,7 @@ // CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o" // CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o" -// OUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" -// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk" -// NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" -// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "-T" "a.out.lk" +// CHECK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o" +// CHECK-SAME: "[[OBJBUNDLE:.*.o]]" "{{.*}}.mcin" "--filetype=obj" +// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "[[OBJBUNDLE]]" +// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "[[OBJBUNDLE]]" diff --git a/clang/test/Driver/hip-link-shared-library.hip b/clang/test/Driver/hip-link-shared-library.hip index cb409d1a874d82..895bd53225e12e 100644 --- a/clang/test/Driver/hip-link-shared-library.hip +++ b/clang/test/Driver/hip-link-shared-library.hip @@ -3,10 +3,13 @@ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \ // RUN: -fgpu-rdc 2>&1 | FileCheck %s -// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]" +// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"] +// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"] +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "{{.*}}/Inputs/in.so", "[[IMG2]]", "[[IMG3]]"], output: "a.out" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]" +// CHECK-NOT: offload bundler +// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "{{.*}}/Inputs/in.so", "[[FATBINOBJ]]"], output: "a.out" diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index b27ae44167a21a..7c2dc1384ccd41 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -21,25 +21,27 @@ // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) // NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) // NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image +// RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]]) -// RDC-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) -// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH]]) +// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]]) -// NRD-DAG: [[P12:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir +// NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir +// RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object +// NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) +// NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) +// NRD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]]) -// NRD-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) -// RDC-DAG: [[P13:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) -// BIN-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (host-[[T]]) -// BIN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (host-[[T]]) -// RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P15]]}, "device-[[T]] (amdgcn-amd-amdhsa:gfx803)" {[[P10]]}, image // // Test single gpu architecture up to the assemble phase. // @@ -56,59 +58,84 @@ // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]]) // -// Test two gpu architectures with complete compilation. +// Test two gpu architectures with complete compilation with -fno-gpu-rdc. // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,NRD2,CL2 %s +// RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,NRD2 %s +// RUN: | FileCheck -check-prefixes=NRD2 %s +// NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) + +// NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) +// NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image + +// NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]]) +// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir +// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) +// NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) + +// +// Test two gpu architectures with complete compilation with -fgpu-rdc. +// // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,RDC2,CL2,RCL2 %s +// RUN: | FileCheck -check-prefixes=RDC2,CL2,RCL2 %s // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,RDC2,RC2 %s +// RUN: | FileCheck -check-prefixes=RDC2,RC2 %s -// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) -// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) -// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) -// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]]) +// RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) // RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image +// RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image // RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir -// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) // RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image // RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir -// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]]) +// RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir -// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) -// RDC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) -// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// CL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) -// RCL2-DAG: [[P22:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P21]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image -// RC2-DAG: [[P22:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]]) +// RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]]) +// RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object +// RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]]) +// RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]]) // // Test two gpu architecturess up to the assemble phase. @@ -253,8 +280,13 @@ // RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T]]) // L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T]]) // RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T]]) -// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]]) -// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (host-[[T]]) -// RL2-DAG: [[P5:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]]) + +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]]) +// RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]]) -// RL2-DAG: [[P7:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P4]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P5]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image +// RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image +// RL2-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]]) +// RL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object + +// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]]) +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]]) diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip index 1ac506aa6fba1c..f326f4155a1506 100644 --- a/clang/test/Driver/hip-save-temps.hip +++ b/clang/test/Driver/hip-save-temps.hip @@ -25,13 +25,20 @@ // -fgpu-rdc without -o // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ // RUN: -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-NOUT,NOUT %s +// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s // -fgpu-rdc with -o -// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ -// RUN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-WOUT,WOUT %s +// UN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ +// UN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ +// UN: FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s + +// -fgpu-rdc host object path +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// RDCL: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o" +// device object paths // CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" // NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc" @@ -43,22 +50,26 @@ // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" -// CHECK-NOT: llvm-link -// CHECK-NOT: opt -// CHECK-NOT: llc +// CHECK-NOT: "{{.*}}llvm-link" +// CHECK-NOT: "{{.*}}opt" +// CHECK-NOT: "{{.*}}llc" // NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" - // RDCL: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // RDCL-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// RDCC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o" +// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o" +// RDCL: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps-hip-amdgcn-amd-amdhsa.hipfb" +// RDCL: {{.*}}llvm-mc{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa.o" "hip-save-temps-hip-amdgcn-amd-amdhsa.mcin" "--filetype=obj" -// NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" -// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// -fno-gpu-rdc host object path +// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" // NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// RDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" -// CHECK: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o" -// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o" -// RDC-NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" -// RDC-WOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" -// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" -// WOUT: "{{.*ld.*}}" {{.*}} "-o" "executable" \ No newline at end of file +// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// NORDC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o" + +// output to default a.out or -o specified file name +// NOUT: {{.*}}ld{{.*}}"-o" "a.out" +// WOUT: {{.*}}ld{{.*}}"-o" "executable" diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 25661f399a82f9..25fd44f2529b4c 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -86,12 +86,22 @@ // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" // LINK: "-unbundle" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],[[B_BC1:.*o]],[[B_BC2:.*o]]" +// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" +// LINK: "-unbundle" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK: "-unbundle" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" // LINK: "-unbundle" // LINK-NOT: "*.llvm-link" @@ -110,5 +120,8 @@ // LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" -// LINK: [[LD:".*ld.*"]] {{.*}} "[[A_OBJ_HOST]]" "[[B_OBJ_HOST]]" -// LINK-SAME: {{.*}} "-T" "{{.*}}.lk" +// LINK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o" +// LINK-SAME: "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj" + +// LINK: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]" +// LINK-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index f520236abcb9f7..7148478b428591 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -12,7 +12,23 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// emit objects for host side path +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" +// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip" +// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" +// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip" +// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] + +// generate image for device side path on gfx803 +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -21,7 +37,7 @@ // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] +// CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" @@ -32,7 +48,7 @@ // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] +// CHECK-SAME: {{.*}} [[B_SRC]] // CHECK-NOT: "*.llvm-link" // CHECK-NOT: ".*opt" @@ -40,6 +56,7 @@ // CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] +// generate image for device side path on gfx900 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" @@ -66,23 +83,13 @@ // CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] -// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[A_SRC]] - -// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[B_SRC]] - +// combine images generated into hip fat binary object // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" -// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] -// CHECK-SAME: {{.*}} "-T" "{{.*}}.lk" +// CHECK: [[MC:".*llvm-mc"]] "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj" + +// output the executable +// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]