diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index a48761af400fda..de732918fc763d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2325,8 +2325,11 @@ class OffloadingActionBuilder final { /// Append top level actions generated by the builder. virtual void appendTopLevelActions(ActionList &AL) {} - /// Append linker actions generated by the builder. - virtual void appendLinkActions(ActionList &AL) {} + /// Append linker device actions generated by the builder. + virtual void appendLinkDeviceActions(ActionList &AL) {} + + /// Append linker host action generated by the builder. + virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; } /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -2796,17 +2799,45 @@ class OffloadingActionBuilder final { : ABRT_Success; } - void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + void appendLinkDeviceActions(ActionList &AL) override { + if (DeviceLinkerInputs.size() == 0) + return; + + assert(DeviceLinkerInputs.size() == GpuArchList.size() && + "Linker inputs and GPU arch list sizes do not match."); + // Append a new link action for each device. unsigned I = 0; for (auto &LI : DeviceLinkerInputs) { + // Each entry in DeviceLinkerInputs corresponds to a GPU arch. auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + // Linking all inputs for the current GPU arch. + // LI contains all the inputs for the linker. + OffloadAction::DeviceDependences DeviceLinkDeps; + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], + CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + AL.push_back(C.MakeAction(DeviceLinkDeps, + DeviceLinkAction->getType())); ++I; } + DeviceLinkerInputs.clear(); + + // Create a host object from all the device images by embedding them + // in a fat binary. + OffloadAction::DeviceDependences DDeps; + auto *TopDeviceLinkAction = + C.MakeAction(AL, types::TY_Object); + DDeps.add(*TopDeviceLinkAction, *ToolChains[0], + nullptr, AssociatedOffloadKind); + + // Offload the host object to the host linker. + AL.push_back(C.MakeAction(DDeps, TopDeviceLinkAction->getType())); } + + Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } + + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} }; /// OpenMP action builder. The host bitcode is passed to the device frontend @@ -2934,7 +2965,7 @@ class OffloadingActionBuilder final { OpenMPDeviceActions.clear(); } - void appendLinkActions(ActionList &AL) override { + void appendLinkDeviceActions(ActionList &AL) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -2953,6 +2984,14 @@ class OffloadingActionBuilder final { DeviceLinkerInputs.clear(); } + Action* appendLinkHostActions(ActionList &AL) override { + // Create wrapper bitcode from the result of device link actions and compile + // it to an object which will be added to the host link command. + auto *BC = C.MakeAction(AL, types::TY_LLVM_BC); + auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); + return C.MakeAction(ASM, types::TY_Object); + } + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} bool initialize() override { @@ -3185,17 +3224,20 @@ class OffloadingActionBuilder final { for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; - SB->appendLinkActions(DeviceAL); + SB->appendLinkDeviceActions(DeviceAL); } if (DeviceAL.empty()) return nullptr; - // Create wrapper bitcode from the result of device link actions and compile - // it to an object which will be added to the host link command. - auto *BC = C.MakeAction(DeviceAL, types::TY_LLVM_BC); - auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); - return C.MakeAction(ASM, types::TY_Object); + // Let builders add host linking actions. + Action* HA; + for (DeviceActionBuilder *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + HA = SB->appendLinkHostActions(DeviceAL); + } + return HA; } /// Processes the host linker action. This currently consists of replacing it diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 7fdb3fdf000948..542fb67db7ef16 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -152,14 +152,12 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); for (const auto &II : Inputs) { - // If the current tool chain refers to an OpenMP or HIP offloading host, we - // should ignore inputs that refer to OpenMP or HIP offloading devices - + // If the current tool chain refers to an OpenMP offloading host, we + // should ignore inputs that refer to OpenMP offloading devices - // they will be embedded according to a proper linker script. if (auto *IA = II.getAction()) if ((JA.isHostOffloading(Action::OFK_OpenMP) && - IA->isDeviceOffloading(Action::OFK_OpenMP)) || - (JA.isHostOffloading(Action::OFK_HIP) && - IA->isDeviceOffloading(Action::OFK_HIP))) + IA->isDeviceOffloading(Action::OFK_OpenMP))) continue; if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType())) @@ -1298,115 +1296,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D, } } -/// Add HIP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding the device images into the host. The -/// linker script also defines a symbol required by the code generation so that -/// the image can be retrieved at runtime. This should be used only in tool -/// chains that support linker scripts. -void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, const ArgList &Args, - ArgStringList &CmdArgs, const JobAction &JA, - const Tool &T) { - - // If this is not a HIP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_HIP)) - return; - - InputInfoList DeviceInputs; - for (const auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && A->isDeviceOffloading(Action::OFK_HIP)) { - DeviceInputs.push_back(II); - } - } - - if (DeviceInputs.empty()) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - std::string Name = - std::string(llvm::sys::path::filename(Output.getFilename())); - if (C.getDriver().isSaveTempsEnabled()) { - LKS = C.getArgs().MakeArgString(Name + ".lk"); - } else { - auto TmpName = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the HIP offload tool chain. - auto *HIPTC = static_cast( - C.getSingleOffloadToolChain()); - assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn && - "Wrong platform"); - (void)HIPTC; - - const char *BundleFile; - if (C.getDriver().isSaveTempsEnabled()) { - BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); - } else { - auto TmpName = C.getDriver().GetTemporaryPath(Name, "hipfb"); - BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T); - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " HIP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - LksStream << "INPUT(" << BundleFile << ")\n"; - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - LksStream << " .hip_fatbin :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n"; - LksStream << " " << BundleFile << "\n"; - LksStream << " }\n"; - LksStream << " /DISCARD/ :\n"; - LksStream << " {\n"; - LksStream << " * ( __CLANG_OFFLOAD_BUNDLE__* )\n"; - LksStream << " }\n"; - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args, const InputInfo &Output, const InputInfo &Input, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 58bc92c9b7569a..c7d695ebf415f0 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -45,12 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D, llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); -void AddHIPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, const JobAction &JA, - const Tool &T); - const char *SplitDebugName(const llvm::opt::ArgList &Args, const InputInfo &Input, const InputInfo &Output); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 93a285d42a08e6..619407a7c57bf9 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -625,10 +625,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - // Add HIP offloading linker script args if required. - AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA, - *this); - Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index b9143f98f15269..f524951ce8375b 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -104,6 +104,76 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, T, Bundler, BundlerArgs, Inputs)); } +/// Add Generated HIP Object File which has device images embedded into the +/// host to the argument list for linking. Using MC directives, embed the +/// device code and also define symbols required by the code generation so that +/// the image can be retrieved at runtime. +void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( + Compilation &C, const InputInfo &Output, + const InputInfoList &Inputs, const ArgList &Args, + const JobAction &JA) const { + const ToolChain &TC = getToolChain(); + std::string Name = + std::string(llvm::sys::path::stem(Output.getFilename())); + + // Create Temp Object File Generator, + // Offload Bundled file and Bundled Object file. + // Keep them if save-temps is enabled. + const char *McinFile; + const char *BundleFile; + if (C.getDriver().isSaveTempsEnabled()) { + McinFile = C.getArgs().MakeArgString(Name + ".mcin"); + BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); + } else { + auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin"); + McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin)); + auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb"); + BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb)); + } + constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this); + + // Create a buffer to write the contents of the temp obj generator. + std::string ObjBuffer; + llvm::raw_string_ostream ObjStream(ObjBuffer); + + // Add MC directives to embed target binaries. We ensure that each + // section and image is 16-byte aligned. This is not mandatory, but + // increases the likelihood of data to be aligned with a cache block + // in several main host machines. + ObjStream << "# HIP Object Generator\n"; + ObjStream << "# *** Automatically generated by Clang ***\n"; + ObjStream << " .type __hip_fatbin,@object\n"; + ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n"; + ObjStream << " .data\n"; + ObjStream << " .globl __hip_fatbin\n"; + ObjStream << " .p2align 3\n"; + ObjStream << "__hip_fatbin:\n"; + ObjStream << " .incbin \"" << BundleFile << "\"\n"; + ObjStream.flush(); + + // Dump the contents of the temp object file gen if the user requested that. + // We support this option to enable testing of behavior with -###. + if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) + llvm::errs() << ObjBuffer; + + // Open script file and write the contents. + std::error_code EC; + llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None); + + if (EC) { + C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); + return; + } + + Objf << ObjBuffer; + + ArgStringList McArgs{"-triple", Args.MakeArgString(TC.getTripleString()), + "-o", Output.getFilename(), + McinFile, "--filetype=obj"}; + const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); + C.addCommand(std::make_unique(JA, *this, Mc, McArgs, Inputs)); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -111,6 +181,10 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + if (Inputs.size() > 0 && + Inputs[0].getType() == types::TY_Image && + JA.getType() == types::TY_Object) + return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA); if (JA.getType() == types::TY_HIP_FATBIN) return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h index fa6984ed9f52ff..5e2be7138579a4 100644 --- a/clang/lib/Driver/ToolChains/HIP.h +++ b/clang/lib/Driver/ToolChains/HIP.h @@ -42,6 +42,13 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool { void constructLldCommand(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const InputInfo &Output, const llvm::opt::ArgList &Args) const; + + // Construct command for creating Object from HIP fatbin. + void constructGenerateObjFileFromHIPFatBinary(Compilation &C, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + const JobAction &JA) const; }; } // end namespace AMDGCN diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip index 53fe9af0e653c9..3d839302234629 100644 --- a/clang/test/Driver/hip-binding.hip +++ b/clang/test/Driver/hip-binding.hip @@ -25,12 +25,15 @@ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ // RUN: 2>&1 | FileCheck %s -// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]" +// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"] +// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"] +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]" +// CHECK-NOT: offload bundler +// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out" // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\ diff --git a/clang/test/Driver/hip-link-save-temps.hip b/clang/test/Driver/hip-link-save-temps.hip index 304be921ebc628..62d8d60af3c099 100644 --- a/clang/test/Driver/hip-link-save-temps.hip +++ b/clang/test/Driver/hip-link-save-temps.hip @@ -27,7 +27,7 @@ // CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o" // CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o" -// OUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" -// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk" -// NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" -// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "-T" "a.out.lk" +// CHECK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o" +// CHECK-SAME: "[[OBJBUNDLE:.*.o]]" "{{.*}}.mcin" "--filetype=obj" +// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "[[OBJBUNDLE]]" +// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "[[OBJBUNDLE]]" diff --git a/clang/test/Driver/hip-link-shared-library.hip b/clang/test/Driver/hip-link-shared-library.hip index cb409d1a874d82..895bd53225e12e 100644 --- a/clang/test/Driver/hip-link-shared-library.hip +++ b/clang/test/Driver/hip-link-shared-library.hip @@ -3,10 +3,13 @@ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \ // RUN: -fgpu-rdc 2>&1 | FileCheck %s -// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]" +// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"] +// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"] +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]" // CHECK-NOT: offload bundler -// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "{{.*}}/Inputs/in.so", "[[IMG2]]", "[[IMG3]]"], output: "a.out" +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]" +// CHECK-NOT: offload bundler +// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "{{.*}}/Inputs/in.so", "[[FATBINOBJ]]"], output: "a.out" diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index b27ae44167a21a..7c2dc1384ccd41 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -21,25 +21,27 @@ // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) // NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) // NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) -// NRD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image +// RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]]) -// RDC-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) -// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH]]) +// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]]) -// NRD-DAG: [[P12:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir +// NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir +// RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object +// NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) +// NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) +// NRD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]]) -// NRD-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) -// RDC-DAG: [[P13:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) -// BIN-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (host-[[T]]) -// BIN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (host-[[T]]) -// RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P15]]}, "device-[[T]] (amdgcn-amd-amdhsa:gfx803)" {[[P10]]}, image // // Test single gpu architecture up to the assemble phase. // @@ -56,59 +58,84 @@ // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]]) // -// Test two gpu architectures with complete compilation. +// Test two gpu architectures with complete compilation with -fno-gpu-rdc. // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,NRD2,CL2 %s +// RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,NRD2 %s +// RUN: | FileCheck -check-prefixes=NRD2 %s +// NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) + +// NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) +// NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image + +// NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]]) +// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir +// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) +// NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) + +// +// Test two gpu architectures with complete compilation with -fgpu-rdc. +// // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,RDC2,CL2,RCL2 %s +// RUN: | FileCheck -check-prefixes=RDC2,CL2,RCL2 %s // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,RDC2,RC2 %s +// RUN: | FileCheck -check-prefixes=RDC2,RC2 %s -// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) -// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) -// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) -// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]]) +// RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) // RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image +// RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image // RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir -// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) // RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image // RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir -// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]]) +// RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir -// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) -// RDC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) -// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// CL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) -// RCL2-DAG: [[P22:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P21]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image -// RC2-DAG: [[P22:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]]) +// RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]]) +// RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object +// RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]]) +// RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]]) // // Test two gpu architecturess up to the assemble phase. @@ -253,8 +280,13 @@ // RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T]]) // L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T]]) // RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T]]) -// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]]) -// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (host-[[T]]) -// RL2-DAG: [[P5:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]]) + +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]]) +// RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]]) -// RL2-DAG: [[P7:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P4]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P5]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image +// RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image +// RL2-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]]) +// RL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object + +// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]]) +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]]) diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip index 1ac506aa6fba1c..f326f4155a1506 100644 --- a/clang/test/Driver/hip-save-temps.hip +++ b/clang/test/Driver/hip-save-temps.hip @@ -25,13 +25,20 @@ // -fgpu-rdc without -o // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ // RUN: -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-NOUT,NOUT %s +// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s // -fgpu-rdc with -o -// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ -// RUN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-WOUT,WOUT %s +// UN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ +// UN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ +// UN: FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s + +// -fgpu-rdc host object path +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// RDCL: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o" +// device object paths // CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" // NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc" @@ -43,22 +50,26 @@ // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" // NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" -// CHECK-NOT: llvm-link -// CHECK-NOT: opt -// CHECK-NOT: llc +// CHECK-NOT: "{{.*}}llvm-link" +// CHECK-NOT: "{{.*}}opt" +// CHECK-NOT: "{{.*}}llc" // NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" - // RDCL: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // RDCL-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// RDCC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o" +// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o" +// RDCL: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps-hip-amdgcn-amd-amdhsa.hipfb" +// RDCL: {{.*}}llvm-mc{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa.o" "hip-save-temps-hip-amdgcn-amd-amdhsa.mcin" "--filetype=obj" -// NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" -// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// -fno-gpu-rdc host object path +// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" // NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// RDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" -// CHECK: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o" -// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o" -// RDC-NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" -// RDC-WOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" -// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" -// WOUT: "{{.*ld.*}}" {{.*}} "-o" "executable" \ No newline at end of file +// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// NORDC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o" + +// output to default a.out or -o specified file name +// NOUT: {{.*}}ld{{.*}}"-o" "a.out" +// WOUT: {{.*}}ld{{.*}}"-o" "executable" diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 25661f399a82f9..25fd44f2529b4c 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -86,12 +86,22 @@ // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" // LINK: "-unbundle" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],[[B_BC1:.*o]],[[B_BC2:.*o]]" +// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" +// LINK: "-unbundle" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK: "-unbundle" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" // LINK: "-unbundle" // LINK-NOT: "*.llvm-link" @@ -110,5 +120,8 @@ // LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" -// LINK: [[LD:".*ld.*"]] {{.*}} "[[A_OBJ_HOST]]" "[[B_OBJ_HOST]]" -// LINK-SAME: {{.*}} "-T" "{{.*}}.lk" +// LINK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o" +// LINK-SAME: "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj" + +// LINK: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]" +// LINK-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index f520236abcb9f7..7148478b428591 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -12,7 +12,23 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// emit objects for host side path +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" +// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip" +// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" +// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip" +// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] + +// generate image for device side path on gfx803 +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -21,7 +37,7 @@ // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] +// CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" @@ -32,7 +48,7 @@ // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] +// CHECK-SAME: {{.*}} [[B_SRC]] // CHECK-NOT: "*.llvm-link" // CHECK-NOT: ".*opt" @@ -40,6 +56,7 @@ // CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] +// generate image for device side path on gfx900 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" @@ -66,23 +83,13 @@ // CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] -// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[A_SRC]] - -// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip" -// CHECK-SAME: {{.*}} [[B_SRC]] - +// combine images generated into hip fat binary object // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" -// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] -// CHECK-SAME: {{.*}} "-T" "{{.*}}.lk" +// CHECK: [[MC:".*llvm-mc"]] "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj" + +// output the executable +// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]