diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5dbfce01b2686..9f650e9821dda 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -956,6 +956,12 @@ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOpt HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; +def fno_bundle_offload_arch : Flag<["-"], "fno-bundle-offload-arch">, + HelpText<"Specify that the offload bundler should not identify a bundle with " + "specific arch. For example, the bundle for `nvptx64-nvidia-cuda-sm_80` " + "uses the bundle tag `nvptx64-nvidia-cuda` when used. " + "This allows .o files to contain .bc bundles that are unspecific " + "to a particular arch version.">; def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a " "device architecture followed by target ID features delimited by a colon. Each target ID feature " diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3d43dfb815ded..3abdc7c9ee425 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4473,6 +4473,9 @@ class OffloadingActionBuilder final { /// List of static archives to extract FPGA dependency info from ActionList FPGAArchiveInputs; + // SYCLInstallation is needed in order to link SYCLDeviceLibs + SYCLInstallationDetector SYCLInstallation; + /// List of GPU architectures to use in this compilation with NVPTX/AMDGCN /// targets. SmallVector, 8> GpuArchList; @@ -4513,7 +4516,8 @@ class OffloadingActionBuilder final { SYCLActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs, OffloadingActionBuilder &OAB) - : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB) {} + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB), + SYCLInstallation(C.getDriver()) {} void withBoundArchForToolChain(const ToolChain *TC, llvm::function_ref Op) { @@ -4892,10 +4896,8 @@ class OffloadingActionBuilder final { } } - const toolchains::SYCLToolChain *SYCLTC = - static_cast(TC); SmallVector, 4> LibLocCandidates; - SYCLTC->SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); + SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o"; using SYCLDeviceLibsList = SmallVector; @@ -4948,20 +4950,100 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibsUnbundleAction = C.MakeAction( SYCLDeviceLibsInputAction); - addDeviceDepences(SYCLDeviceLibsUnbundleAction); - DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction); + + // We are using BoundArch="" here since the NVPTX bundles in + // the devicelib .o files do not contain any arch information + SYCLDeviceLibsUnbundleAction->registerDependentActionInfo( + TC, /*BoundArch=*/"", Action::OFK_SYCL); + OffloadAction::DeviceDependences Dep; + Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/"", + Action::OFK_SYCL); + auto *SYCLDeviceLibsDependenciesAction = + C.MakeAction( + Dep, SYCLDeviceLibsUnbundleAction->getType()); + + DeviceLinkObjects.push_back(SYCLDeviceLibsDependenciesAction); if (!LibLocSelected) LibLocSelected = !LibLocSelected; } } } }; + addInputs(sycl_device_wrapper_libs); - if (isSpirvAOT) + if (isSpirvAOT || TC->getTriple().isNVPTX()) addInputs(sycl_device_fallback_libs); if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, options::OPT_fno_sycl_instrument_device_code, true)) addInputs(sycl_device_annotation_libs); + + // For NVPTX backend we need to also link libclc and CUDA libdevice + // at the same stage that we link all of the unbundled SYCL libdevice + // objects together. + if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) { + std::string LibSpirvFile; + if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) { + auto ProvidedPath = + Args.getLastArgValue(options::OPT_fsycl_libspirv_path_EQ).str(); + if (llvm::sys::fs::exists(ProvidedPath)) + LibSpirvFile = ProvidedPath; + } else { + SmallVector LibraryPaths; + + // Expected path w/out install. + SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); + LibraryPaths.emplace_back(WithoutInstallPath.c_str()); + + // Expected path w/ install. + SmallString<256> WithInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc")); + LibraryPaths.emplace_back(WithInstallPath.c_str()); + + // Select remangled libclc variant + std::string LibSpirvTargetName = + (TC->getAuxTriple()->isOSWindows()) + ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." + "bc" + : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." + "bc"; + + for (StringRef LibraryPath : LibraryPaths) { + SmallString<128> LibSpirvTargetFile(LibraryPath); + llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); + if (llvm::sys::fs::exists(LibSpirvTargetFile) || + Args.hasArg(options::OPT__HASH_HASH_HASH)) { + LibSpirvFile = std::string(LibSpirvTargetFile.str()); + break; + } + } + } + + if (!LibSpirvFile.empty()) { + Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibSpirvFile)); + auto *SYCLLibClcInputAction = + C.MakeAction(*LibClcInputArg, types::TY_LLVM_BC); + DeviceLinkObjects.push_back(SYCLLibClcInputAction); + } + + const toolchains::CudaToolChain *CudaTC = + static_cast(TC); + for (auto LinkInputEnum : enumerate(DeviceLinkerInputs)) { + const char *BoundArch = + SYCLTargetInfoList[LinkInputEnum.index()].BoundArch; + std::string LibDeviceFile = + CudaTC->CudaInstallation.getLibDeviceFile(BoundArch); + if (!LibDeviceFile.empty()) { + Arg *CudaDeviceLibInputArg = + MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibDeviceFile)); + auto *SYCLDeviceLibInputAction = C.MakeAction( + *CudaDeviceLibInputArg, types::TY_LLVM_BC); + DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); + } + } + } return NumOfDeviceLibLinked != 0; } @@ -5111,11 +5193,12 @@ class OffloadingActionBuilder final { // When spv online link is supported by all backends, the fallback // device libraries are only needed when current toolchain is using // AOT compilation. - if (isSPIR) { + if (isSPIR || isNVPTX) { bool UseJitLink = + isSPIR && Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, options::OPT_fno_sycl_device_lib_jit_link, false); - bool UseAOTLink = isSpirvAOT || !UseJitLink; + bool UseAOTLink = isSPIR && (isSpirvAOT || !UseJitLink); SYCLDeviceLibLinked = addSYCLDeviceLibs( TC, FullLinkObjects, UseAOTLink, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3a1d372681246..125538ba87f45 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8730,7 +8730,8 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, Triples += CurTC->getTriple().normalize(); if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP || CurKind == Action::OFK_Cuda || CurKind == Action::OFK_SYCL) && - !StringRef(CurDep->getOffloadingArch()).empty()) { + !StringRef(CurDep->getOffloadingArch()).empty() && + !TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) { Triples += '-'; Triples += CurDep->getOffloadingArch(); } @@ -8910,7 +8911,8 @@ void OffloadBundler::ConstructJobMultipleOutputs( Dep.DependentOffloadKind == Action::OFK_OpenMP || Dep.DependentOffloadKind == Action::OFK_Cuda || Dep.DependentOffloadKind == Action::OFK_SYCL) && - !Dep.DependentBoundArch.empty()) { + !Dep.DependentBoundArch.empty() && + !TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) { Triples += '-'; Triples += Dep.DependentBoundArch; } diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 5ea943563b42d..c546c4ecf39cd 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -183,7 +183,9 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override; void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override; - bool IsMathErrnoDefault() const override { return false; } + + // math-errno should be the default for SYCL but not other OFK using CUDA TC + bool IsMathErrnoDefault() const override { return OK == Action::OFK_SYCL; } void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index ff31cfb2bc5bd..bd124f184dc88 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -170,6 +170,13 @@ const char *SYCL::Linker::constructLLVMLinkCommand( LibPostfix = ".obj"; std::string FileName = this->getToolChain().getInputFilename(II); StringRef InputFilename = llvm::sys::path::filename(FileName); + if (this->getToolChain().getTriple().isNVPTX()) { + // Linking SYCL Device libs requires libclc as well as libdevice + if ((InputFilename.find("nvidiacl") != InputFilename.npos || + InputFilename.find("libdevice") != InputFilename.npos)) + return true; + LibPostfix = ".cubin"; + } StringRef LibSyclPrefix("libsycl-"); if (!InputFilename.startswith(LibSyclPrefix) || !InputFilename.endswith(LibPostfix) || (InputFilename.count('-') < 2)) @@ -620,7 +627,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC), SYCLInstallation(D) { + : ToolChain(D, Triple, Args), HostTC(HostTC) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 8c9bcf56a20bd..7281b50865f69 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -172,9 +172,7 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override; - const ToolChain &HostTC; - const SYCLInstallationDetector SYCLInstallation; protected: Tool *buildBackendCompiler() const override; diff --git a/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc b/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc b/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-cuda-tu-offload.cu b/clang/test/Driver/sycl-cuda-tu-offload.cu index 113112798e666..8340d71144cdb 100644 --- a/clang/test/Driver/sycl-cuda-tu-offload.cu +++ b/clang/test/Driver/sycl-cuda-tu-offload.cu @@ -1,4 +1,4 @@ -// RUN: %clangxx -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 -c %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 -c %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES // Test the correct placement of the offloading actions for compiling CUDA sources (*.cu) in SYCL. @@ -19,7 +19,7 @@ // DEFAULT-PHASES:|- 14: assembler, {13}, object, (host-cuda-sycl) // DEFAULT-PHASES:15: clang-offload-bundler, {3, 14}, object, (host-cuda-sycl) -// RUN: %clangxx -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES2 +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES2 // DEFAULT-PHASES2: +- 0: input, "{{.*}}", cuda, (host-cuda) // DEFAULT-PHASES2: +- 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) @@ -37,14 +37,71 @@ // DEFAULT-PHASES2: +- 13: assembler, {12}, object, (host-cuda-sycl) // DEFAULT-PHASES2: +- 14: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {13}, object // DEFAULT-PHASES2:+- 15: linker, {14}, image, (host-cuda-sycl) -// DEFAULT-PHASES2:| +- 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_80)" {5}, ir -// DEFAULT-PHASES2:| +- 17: linker, {16}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| +- 18: sycl-post-link, {17}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | +- 19: file-table-tform, {18}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | | +- 20: backend, {19}, assembler, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | | |- 21: assembler, {20}, object, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | |- 22: linker, {20, 21}, cuda-fatbin, (device-sycl, sm_80) -// DEFAULT-PHASES2:| |- 23: foreach, {19, 22}, cuda-fatbin, (device-sycl, sm_80) -// DEFAULT-PHASES2:| +- 24: file-table-tform, {18, 23}, tempfiletable, (device-sycl, sm_80) -// DEFAULT-PHASES2:|- 25: clang-offload-wrapper, {24}, object, (device-sycl, sm_80) -// DEFAULT-PHASES2:26: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {15}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {25}, image +// DEFAULT-PHASES2:| +- 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_80)" {5}, ir +// DEFAULT-PHASES2:| +- 17: linker, {16}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | +- 18: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 19: clang-offload-unbundler, {18}, object +// DEFAULT-PHASES2:| |- 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// DEFAULT-PHASES2:| | +- 21: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 22: clang-offload-unbundler, {21}, object +// DEFAULT-PHASES2:| |- 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// DEFAULT-PHASES2:| | +- 24: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 25: clang-offload-unbundler, {24}, object +// DEFAULT-PHASES2:| |- 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// DEFAULT-PHASES2:| | +- 27: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 28: clang-offload-unbundler, {27}, object +// DEFAULT-PHASES2:| |- 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// DEFAULT-PHASES2:| | +- 30: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 31: clang-offload-unbundler, {30}, object +// DEFAULT-PHASES2:| |- 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// DEFAULT-PHASES2:| | +- 33: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 34: clang-offload-unbundler, {33}, object +// DEFAULT-PHASES2:| |- 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// DEFAULT-PHASES2:| | +- 36: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 37: clang-offload-unbundler, {36}, object +// DEFAULT-PHASES2:| |- 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// DEFAULT-PHASES2:| | +- 39: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 40: clang-offload-unbundler, {39}, object +// DEFAULT-PHASES2:| |- 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// DEFAULT-PHASES2:| | +- 42: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 43: clang-offload-unbundler, {42}, object +// DEFAULT-PHASES2:| |- 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// DEFAULT-PHASES2:| | +- 45: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 46: clang-offload-unbundler, {45}, object +// DEFAULT-PHASES2:| |- 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// DEFAULT-PHASES2:| | +- 48: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 49: clang-offload-unbundler, {48}, object +// DEFAULT-PHASES2:| |- 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// DEFAULT-PHASES2:| | +- 51: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 52: clang-offload-unbundler, {51}, object +// DEFAULT-PHASES2:| |- 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// DEFAULT-PHASES2:| | +- 54: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 55: clang-offload-unbundler, {54}, object +// DEFAULT-PHASES2:| |- 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// DEFAULT-PHASES2:| | +- 57: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 58: clang-offload-unbundler, {57}, object +// DEFAULT-PHASES2:| |- 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// DEFAULT-PHASES2:| | +- 60: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 61: clang-offload-unbundler, {60}, object +// DEFAULT-PHASES2:| |- 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// DEFAULT-PHASES2:| | +- 63: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 64: clang-offload-unbundler, {63}, object +// DEFAULT-PHASES2:| |- 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// DEFAULT-PHASES2:| | +- 66: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 67: clang-offload-unbundler, {66}, object +// DEFAULT-PHASES2:| |- 68: offload, " (nvptx64-nvidia-cuda)" {67}, object +// DEFAULT-PHASES2:| | +- 69: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 70: clang-offload-unbundler, {69}, object +// DEFAULT-PHASES2:| |- 71: offload, " (nvptx64-nvidia-cuda)" {70}, object +// DEFAULT-PHASES2:| |- 72: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 74: linker, {17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 72, 73}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 75: sycl-post-link, {74}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | +- 76: file-table-tform, {75}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | | +- 77: backend, {76}, assembler, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | | |- 78: assembler, {77}, object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | |- 79: linker, {77, 78}, cuda-fatbin, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 80: foreach, {76, 79}, cuda-fatbin, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 81: file-table-tform, {75, 80}, tempfiletable, (device-sycl, sm_80) +// DEFAULT-PHASES2:|- 82: clang-offload-wrapper, {81}, object, (device-sycl, sm_80) +// DEFAULT-PHASES2:83: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {15}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {82}, image diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c index af1c40d1d632d..7689b83f89356 100644 --- a/clang/test/Driver/sycl-instrumentation.c +++ b/clang/test/Driver/sycl-instrumentation.c @@ -22,7 +22,6 @@ // RUN: %clangxx -fsycl -fno-sycl-instrument-device-code -fsycl-targets=spir64 -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-instrument-device-code -nocudalib -### %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED,CHECK-WARNING %s +// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // CHECK-NONPASSED-NOT: "-fsycl-instrument-device-code" // CHECK-NONPASSED-NOT: "-input={{.*}}libsycl-itt-{{.*}}.{{o|obj}}" -// CHECK-WARNING: warning: argument unused during compilation: '-fno-sycl-instrument-device-code' diff --git a/clang/test/Driver/sycl-no-bundle-offload-arch.cpp b/clang/test/Driver/sycl-no-bundle-offload-arch.cpp new file mode 100644 index 0000000000000..6186a3cc8323f --- /dev/null +++ b/clang/test/Driver/sycl-no-bundle-offload-arch.cpp @@ -0,0 +1,13 @@ +// RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-bundle-offload-arch -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUNDLE-TAG-OBJ %s +// +// CHK-BUNDLE-TAG-OBJ-NOT: clang-offload-bundler{{.*}}-targets=sycl-nvptx64-nvidia-cuda-sm_" + +// RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-bundle-offload-arch %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUNDLE-TAG %s +// +// CHK-BUNDLE-TAG-NOT: clang-offload-bundler{{.*}}-targets=sycl-nvptx64-nvidia-cuda-sm_" + +void func(){}; diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index 455d0cd0c238d..36ee0168225bf 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -34,13 +34,14 @@ // CHK-ACTIONS-WIN: clang-offload-wrapper"{{.*}} "-host=x86_64-pc-windows-msvc" "-target=nvptx64" "-kind=sycl"{{.*}} /// Check phases w/out specifying a compute capability. -// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ +// RUN: -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl \ +// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \ // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s // -// RUN: %clang_cl -ccc-print-phases -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s +// TODO: Enable for clang_cl once device lib linking works for clang_cl // // CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES-NO-CC: 1: append-footer, {0}, c++, (host-sycl) @@ -54,26 +55,84 @@ // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {19}, image - +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-crt.o", object +// CHK-PHASES-NO-CC: 13: clang-offload-unbundler, {12}, object +// CHK-PHASES-NO-CC: 14: offload, " (nvptx64-nvidia-cuda)" {13}, object +// CHK-PHASES-NO-CC: 15: input, "{{.*}}libsycl-complex.o", object +// CHK-PHASES-NO-CC: 16: clang-offload-unbundler, {15}, object +// CHK-PHASES-NO-CC: 17: offload, " (nvptx64-nvidia-cuda)" {16}, object +// CHK-PHASES-NO-CC: 18: input, "{{.*}}libsycl-complex-fp64.o", object +// CHK-PHASES-NO-CC: 19: clang-offload-unbundler, {18}, object +// CHK-PHASES-NO-CC: 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// CHK-PHASES-NO-CC: 21: input, "{{.*}}libsycl-cmath.o", object +// CHK-PHASES-NO-CC: 22: clang-offload-unbundler, {21}, object +// CHK-PHASES-NO-CC: 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// CHK-PHASES-NO-CC: 24: input, "{{.*}}libsycl-cmath-fp64.o", object +// CHK-PHASES-NO-CC: 25: clang-offload-unbundler, {24}, object +// CHK-PHASES-NO-CC: 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// CHK-PHASES-NO-CC: 27: input, "{{.*}}libsycl-imf.o", object +// CHK-PHASES-NO-CC: 28: clang-offload-unbundler, {27}, object +// CHK-PHASES-NO-CC: 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// CHK-PHASES-NO-CC: 30: input, "{{.*}}libsycl-imf-fp64.o", object +// CHK-PHASES-NO-CC: 31: clang-offload-unbundler, {30}, object +// CHK-PHASES-NO-CC: 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// CHK-PHASES-NO-CC: 33: input, "{{.*}}libsycl-fallback-cassert.o", object +// CHK-PHASES-NO-CC: 34: clang-offload-unbundler, {33}, object +// CHK-PHASES-NO-CC: 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// CHK-PHASES-NO-CC: 36: input, "{{.*}}libsycl-fallback-cstring.o", object +// CHK-PHASES-NO-CC: 37: clang-offload-unbundler, {36}, object +// CHK-PHASES-NO-CC: 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// CHK-PHASES-NO-CC: 39: input, "{{.*}}libsycl-fallback-complex.o", object +// CHK-PHASES-NO-CC: 40: clang-offload-unbundler, {39}, object +// CHK-PHASES-NO-CC: 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// CHK-PHASES-NO-CC: 42: input, "{{.*}}libsycl-fallback-complex-fp64.o", object +// CHK-PHASES-NO-CC: 43: clang-offload-unbundler, {42}, object +// CHK-PHASES-NO-CC: 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// CHK-PHASES-NO-CC: 45: input, "{{.*}}libsycl-fallback-cmath.o", object +// CHK-PHASES-NO-CC: 46: clang-offload-unbundler, {45}, object +// CHK-PHASES-NO-CC: 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// CHK-PHASES-NO-CC: 48: input, "{{.*}}libsycl-fallback-cmath-fp64.o", object +// CHK-PHASES-NO-CC: 49: clang-offload-unbundler, {48}, object +// CHK-PHASES-NO-CC: 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// CHK-PHASES-NO-CC: 51: input, "{{.*}}libsycl-fallback-imf.o", object +// CHK-PHASES-NO-CC: 52: clang-offload-unbundler, {51}, object +// CHK-PHASES-NO-CC: 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// CHK-PHASES-NO-CC: 54: input, "{{.*}}libsycl-fallback-imf-fp64.o", object +// CHK-PHASES-NO-CC: 55: clang-offload-unbundler, {54}, object +// CHK-PHASES-NO-CC: 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// CHK-PHASES-NO-CC: 57: input, "{{.*}}libsycl-itt-user-wrappers.o", object +// CHK-PHASES-NO-CC: 58: clang-offload-unbundler, {57}, object +// CHK-PHASES-NO-CC: 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// CHK-PHASES-NO-CC: 60: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object +// CHK-PHASES-NO-CC: 61: clang-offload-unbundler, {60}, object +// CHK-PHASES-NO-CC: 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// CHK-PHASES-NO-CC: 63: input, "{{.*}}libsycl-itt-stubs.o", object +// CHK-PHASES-NO-CC: 64: clang-offload-unbundler, {63}, object +// CHK-PHASES-NO-CC: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// CHK-PHASES-NO-CC: 66: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 69: sycl-post-link, {68}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 70: file-table-tform, {69}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 71: backend, {70}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 72: assembler, {71}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 73: linker, {71, 72}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 74: foreach, {70, 73}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 75: file-table-tform, {69, 74}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 76: clang-offload-wrapper, {75}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 77: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {76}, image +// +// /// Check phases specifying a compute capability. -// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ +// RUN: -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl \ +// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \ // RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES %s // -// RUN: %clang_cl -ccc-print-phases -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-cuda \ -// RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PHASES %s +// TODO: Enable for clang_cl once device lib linking works for clang_cl // // CHK-PHASES: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES: 1: append-footer, {0}, c++, (host-sycl) @@ -87,15 +146,72 @@ // CHK-PHASES: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES: 10: linker, {9}, image, (host-sycl) // CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35) -// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35) -// CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35) -// CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 15: assembler, {14}, object, (device-sycl, sm_35) -// CHK-PHASES: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_35) -// CHK-PHASES: 20: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {19}, image +// CHK-PHASES: 12: input, "{{.*}}libsycl-crt.o", object +// CHK-PHASES: 13: clang-offload-unbundler, {12}, object +// CHK-PHASES: 14: offload, " (nvptx64-nvidia-cuda)" {13}, object +// CHK-PHASES: 15: input, "{{.*}}libsycl-complex.o", object +// CHK-PHASES: 16: clang-offload-unbundler, {15}, object +// CHK-PHASES: 17: offload, " (nvptx64-nvidia-cuda)" {16}, object +// CHK-PHASES: 18: input, "{{.*}}libsycl-complex-fp64.o", object +// CHK-PHASES: 19: clang-offload-unbundler, {18}, object +// CHK-PHASES: 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// CHK-PHASES: 21: input, "{{.*}}libsycl-cmath.o", object +// CHK-PHASES: 22: clang-offload-unbundler, {21}, object +// CHK-PHASES: 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// CHK-PHASES: 24: input, "{{.*}}libsycl-cmath-fp64.o", object +// CHK-PHASES: 25: clang-offload-unbundler, {24}, object +// CHK-PHASES: 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// CHK-PHASES: 27: input, "{{.*}}libsycl-imf.o", object +// CHK-PHASES: 28: clang-offload-unbundler, {27}, object +// CHK-PHASES: 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// CHK-PHASES: 30: input, "{{.*}}libsycl-imf-fp64.o", object +// CHK-PHASES: 31: clang-offload-unbundler, {30}, object +// CHK-PHASES: 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// CHK-PHASES: 33: input, "{{.*}}libsycl-fallback-cassert.o", object +// CHK-PHASES: 34: clang-offload-unbundler, {33}, object +// CHK-PHASES: 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// CHK-PHASES: 36: input, "{{.*}}libsycl-fallback-cstring.o", object +// CHK-PHASES: 37: clang-offload-unbundler, {36}, object +// CHK-PHASES: 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// CHK-PHASES: 39: input, "{{.*}}libsycl-fallback-complex.o", object +// CHK-PHASES: 40: clang-offload-unbundler, {39}, object +// CHK-PHASES: 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// CHK-PHASES: 42: input, "{{.*}}libsycl-fallback-complex-fp64.o", object +// CHK-PHASES: 43: clang-offload-unbundler, {42}, object +// CHK-PHASES: 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// CHK-PHASES: 45: input, "{{.*}}libsycl-fallback-cmath.o", object +// CHK-PHASES: 46: clang-offload-unbundler, {45}, object +// CHK-PHASES: 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// CHK-PHASES: 48: input, "{{.*}}libsycl-fallback-cmath-fp64.o", object +// CHK-PHASES: 49: clang-offload-unbundler, {48}, object +// CHK-PHASES: 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// CHK-PHASES: 51: input, "{{.*}}libsycl-fallback-imf.o", object +// CHK-PHASES: 52: clang-offload-unbundler, {51}, object +// CHK-PHASES: 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// CHK-PHASES: 54: input, "{{.*}}libsycl-fallback-imf-fp64.o", object +// CHK-PHASES: 55: clang-offload-unbundler, {54}, object +// CHK-PHASES: 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// CHK-PHASES: 57: input, "{{.*}}libsycl-itt-user-wrappers.o", object +// CHK-PHASES: 58: clang-offload-unbundler, {57}, object +// CHK-PHASES: 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// CHK-PHASES: 60: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object +// CHK-PHASES: 61: clang-offload-unbundler, {60}, object +// CHK-PHASES: 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// CHK-PHASES: 63: input, "{{.*}}libsycl-itt-stubs.o", object +// CHK-PHASES: 64: clang-offload-unbundler, {63}, object +// CHK-PHASES: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// CHK-PHASES: 66: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_35) +// CHK-PHASES: 69: sycl-post-link, {68}, ir, (device-sycl, sm_35) +// CHK-PHASES: 70: file-table-tform, {69}, ir, (device-sycl, sm_35) +// CHK-PHASES: 71: backend, {70}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 72: assembler, {71}, object, (device-sycl, sm_35) +// CHK-PHASES: 73: linker, {71, 72}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 74: foreach, {70, 73}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 75: file-table-tform, {69, 74}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 76: clang-offload-wrapper, {75}, object, (device-sycl, sm_35) +// CHK-PHASES: 77: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {76}, image /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 44a3bc186a48f..bec378d428511 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -1,4 +1,3 @@ -assert/__assert_fail.cl reflect.ll atomic/loadstore_helpers.ll cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl b/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl deleted file mode 100644 index 8a821ad16bf97..0000000000000 --- a/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl +++ /dev/null @@ -1,14 +0,0 @@ -#include - -void __assertfail(const char *__message, const char *__file, unsigned __line, - const char *__function, size_t __charSize); - -_CLC_DECL void __assert_fail(const char *expr, const char *file, - unsigned int line, const char *func) { - __assertfail(expr, file, line, func, 1); -} - -_CLC_DECL void _wassert(const char *_Message, const char *_File, - unsigned _Line) { - __assertfail(_Message, _File, _Line, 0, 1); -} diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 5cce135eb9429..a653b54466027 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -32,6 +32,14 @@ set(compile_opts -sycl-std=2020 ) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") + list(APPEND compile_opts + "-fno-sycl-libspirv" + "-fno-bundle-offload-arch" + "-nocudalib") +endif() + if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index fba86584ffca7..d183a3118ab11 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -147,4 +147,4 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C_INLINE float atanhf(float x) { return __devicelib_atanhf(x); } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 351d760fc51e6..be68e7ab56389 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -444,4 +444,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 13cb984633920..94481bc640de1 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -8,7 +8,7 @@ #include "wrapper.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); @@ -64,4 +64,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line, __spirv_LocalInvocationId_z()); } #endif -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/device.h b/libdevice/device.h index 2c56794da0b13..0770d7d82d29a 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,7 @@ #define EXTERN_C #endif // __cplusplus -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ @@ -25,7 +25,7 @@ #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C #define DEVICE_EXTERN_C_INLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) #define __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index eb86baba931ae..006d2d5b8c57c 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,7 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include typedef struct { @@ -289,5 +289,5 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index b03a3409b7bf8..47bae9f54714a 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -99,3 +99,24 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // *die = 0xdead; } #endif // __SPIR__ + +#ifdef __NVPTX__ + +DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, + unsigned __line, const char *__function, + size_t charSize); + +DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, + int32_t line, const char *func, + uint64_t gid0, uint64_t gid1, + uint64_t gid2, uint64_t lid0, + uint64_t lid1, uint64_t lid2) { + __assertfail(expr, file, line, func, 1); +} + +DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, + unsigned _Line) { + __assertfail(_Message, _File, _Line, 0, 1); +} + +#endif diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index f3e1606a51dbf..de95639613b42 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -149,4 +149,4 @@ DEVICE_EXTERN_C_INLINE double __devicelib_scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 60f4208f981c0..46fd3326385d6 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -166,4 +166,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index 4410ff238720e..bebfc621857d7 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,7 @@ #include "wrapper.h" #include -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { @@ -202,4 +202,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) { return head_cmp; } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index eab02e7a860be..640f0dbf3cd9e 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,7 +11,7 @@ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include #include @@ -51,5 +51,10 @@ DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { return __spirv_BuiltInLocalInvocationId.z; } +#ifndef __SPIR__ +const size_t_vec __spirv_BuiltInGlobalInvocationId{}; +const size_t_vec __spirv_BuiltInLocalInvocationId{}; #endif // __SPIR__ + +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index 75d47f7f98afb..c3ec6ec1fa785 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,7 @@ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include #include @@ -29,5 +29,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2); -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_WRAPPER_H__