Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/OffloadArch.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ enum class OffloadArch {
BMG_G21,
LAST,

CudaDefault = OffloadArch::SM_52,
CudaDefault = OffloadArch::SM_75,
HIPDefault = OffloadArch::GFX906,
};

Expand Down
Empty file.
7 changes: 7 additions & 0 deletions clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//
// Placeholder file for testing CUDA version detection
//

#define CUDA_VERSION 10000

//
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//
// Placeholder file for testing CUDA version detection
//

#define CUDA_VERSION 10000

//
Empty file.
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//
// Placeholder file for testing CUDA version detection
//

#define CUDA_VERSION 7000

//
Empty file.
Empty file.
Binary file added clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o
Binary file not shown.
35 changes: 21 additions & 14 deletions clang/test/Driver/cuda-detect.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
Expand Down Expand Up @@ -96,14 +96,14 @@


// Verify that -nocudainc prevents adding include path to CUDA headers.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10

// We should not add any CUDA include paths if there's no valid CUDA installation
// RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
Expand All @@ -123,10 +123,10 @@
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE

// Verify that -nocudalib prevents linking libdevice bitcode in.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON

Expand All @@ -152,10 +152,10 @@
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix CUDA80

// Verify that if no version file is found, we report the default of 7.0.
// Verify that if no version file is found, we report the default of 10.0.
// RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix CUDA70
// RUN: | FileCheck %s -check-prefix CUDA100

// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
// NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda
Expand All @@ -174,6 +174,7 @@
// LIBDEVICE50-SAME: libdevice.compute_50.10.bc
// PTX42-SAME: "-target-feature" "+ptx42"
// PTX60-SAME: "-target-feature" "+ptx60"
// PTX63-SAME: "-target-feature" "+ptx63"
// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
// NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
Expand All @@ -188,14 +189,20 @@
// CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
// CHECK-CXXINCLUDE: ld{{.*}}"

// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA70-SAME: -target-sdk-version=7.0
// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CUDA70-SAME: -target-sdk-version=7.0
// CUDA70: ld{{.*}}"

// CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA80-SAME: -target-sdk-version=8.0
// CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CUDA80-SAME: -target-sdk-version=8.0
// CUDA80: ld{{.*}}"

// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA70-SAME: -target-sdk-version=7.0
// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CUDA70-SAME: -target-sdk-version=7.0
// CUDA70: ld{{.*}}"
// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA100-SAME: -target-sdk-version=10.0
// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CUDA100-SAME: -target-sdk-version=10.0
// CUDA100: ld{{.*}}"
8 changes: 4 additions & 4 deletions clang/test/Driver/cuda-flush-denormals-to-zero.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// -fgpu-flush-denormals-to-zero. This should be translated to
// -fdenormal-fp-math-f32=preserve-sign

// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s

// Test alias options -f[no-]cuda-flush-denormals-to-zero
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s

// Test explicit argument, with CUDA offload kind
// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
Expand Down
6 changes: 3 additions & 3 deletions clang/test/Driver/cuda-march.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
// RUN: %clang -### --target=x86_64-linux-gnu -c \
// RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s
// RUN: %clang -### --target=x86_64-linux-gnu -c \
// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s
// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s

// CHECK: "-cc1"{{.*}} "-triple" "nvptx
// CHECK-SAME: "-target-cpu" "sm_52"
// CHECK-SAME: "-target-cpu" "sm_75"

// CHECK: ptxas
// CHECK-SAME: "--gpu-name" "sm_52"
// CHECK-SAME: "--gpu-name" "sm_75"

// CHECK: "-cc1"{{.*}} "-target-cpu" "haswell"
6 changes: 4 additions & 2 deletions clang/test/Driver/cuda-options.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@
// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s

// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
// we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
// we default to sm_75 -- same as if no --cuda-gpu-arch were passed.
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
// RUN: --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
// RUN: -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s

// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
Expand Down Expand Up @@ -193,6 +193,8 @@
// NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
// ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
// NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75"
// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75"
// ARCHALLERROR: error: unsupported CUDA gpu architecture: all

// Match device-side preprocessor and compiler phases with -save-temps.
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/cuda-ptxas-path.cu
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %clang -### --target=i386-unknown-linux \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --ptxas-path=/some/path/to/ptxas %s 2>&1 \
// RUN: | FileCheck %s

// CHECK-NOT: "ptxas"
// CHECK: "/some/path/to/ptxas"
// CHECK-SAME: "--gpu-name" "sm_52"
// CHECK-SAME: "--gpu-name" "sm_75"
2 changes: 1 addition & 1 deletion clang/test/Driver/cuda-short-ptr.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Checks that cuda compilation does the right thing when passed -fcuda-short-ptr

// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s

// CHECK: "-mllvm" "--nvptx-short-ptr"
// CHECK-SAME: "-fcuda-short-ptr"
20 changes: 10 additions & 10 deletions clang/test/Driver/cuda-version-check.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
Expand All @@ -15,43 +15,43 @@
// RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX

// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60

// This should only complain about sm_60, not sm_35.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35

// We should get two errors here, one for sm_60 and one for sm_61.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61

// We should still get an error if we pass -nocudainc, because this compilation
// would invoke ptxas, and we do a version check on that, too.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60

// If with -nocudainc and -E, we don't touch the CUDA install, so we
// shouldn't get an error.
// RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK

// --no-cuda-version-check should suppress all of these errors.
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \
// RUN: --no-cuda-version-check %s | \
// RUN: FileCheck %s --check-prefix=OK

// We need to make sure the version check is done only for the device toolchain,
// therefore we should not get an error in host-only mode. We use the -S here
// to avoid the error being produced in case by the assembler tool, which does
// the same check.
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60

// OK-NOT: error: GPU arch
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/cuda-windows.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %clang -v --target=i386-pc-windows-msvc \
// RUN: %clang -v --target=i386-pc-windows-msvc --cuda-gpu-arch=sm_50 \
// RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
// RUN: %clang -v --target=i386-pc-windows-mingw32 \
// RUN: %clang -v --target=i386-pc-windows-mingw32 --cuda-gpu-arch=sm_50 \
// RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s

// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/lto.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

// llvm-bc and llvm-ll outputs need to match regular suffixes
// (unfortunately).
// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t
// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
//
// CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
Expand Down
2 changes: 1 addition & 1 deletion clang/tools/clang-repl/ClangRepl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ int main(int argc, const char **argv) {
CB.SetCudaSDK(CudaPath);

if (OffloadArch.empty()) {
OffloadArch = "sm_35";
OffloadArch = "sm_75";
}
CB.SetOffloadArch(OffloadArch);

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88
: hasArchAccelFeatures();
}
// If the user did not provide a target we default to the `sm_30` target.
// If the user did not provide a target we default to the `sm_75` target.
std::string getTargetName() const {
return TargetName.empty() ? "sm_30" : TargetName;
return TargetName.empty() ? "sm_75" : TargetName;
}
bool hasTargetName() const { return !TargetName.empty(); }

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s
; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s | %ptxas-verify %}
; RUN: llc < %s -mcpu=sm_30 | FileCheck -check-prefix=ENABLED %s
; RUN: llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | FileCheck -check-prefix=DISABLED %s
; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %}
; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | %ptxas-verify %}

target triple = "nvptx64-nvidia-cuda"

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/i128.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64-- -mcpu=sm_30 2>&1 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- -mcpu=sm_30 | %ptxas-verify %}

define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: srem_i128(
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/math-intrins.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
; RUN: llc < %s -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16
; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %}
; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | %ptxas-verify %}

; CHECK: .target sm_30, debug

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ Members:
- Key: "triple"
Value: "nvptx64-nvidia-cuda"
- Key: "arch"
Value: "sm_52"
Value: "sm_75"
- ImageKind: IMG_None
OffloadKind: OFK_None
String:
- Key: "triple"
Value: "nvptx64-nvidia-cuda"
- Key: "arch"
Value: "sm_70"
Value: "sm_75"
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-objdump/Offloading/coff.test
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ symbols:
# CHECK-EMPTY:
# CHECK-NEXT:OFFLOADING IMAGE [2]:
# CHECK-NEXT:kind cubin
# CHECK-NEXT:arch sm_52
# CHECK-NEXT:arch sm_75
# CHECK-NEXT:triple nvptx64-nvidia-cuda
# CHECK-NEXT:producer openmp
# CHECK-EMPTY:
# CHECK-NEXT:OFFLOADING IMAGE [3]:
# CHECK-NEXT:kind <none>
# CHECK-NEXT:arch sm_70
# CHECK-NEXT:arch sm_75
# CHECK-NEXT:triple nvptx64-nvidia-cuda
# CHECK-NEXT:producer none
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-objdump/Offloading/elf.test
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ Sections:
# CHECK-EMPTY:
# CHECK-NEXT:OFFLOADING IMAGE [2]:
# CHECK-NEXT:kind cubin
# CHECK-NEXT:arch sm_52
# CHECK-NEXT:arch sm_75
# CHECK-NEXT:triple nvptx64-nvidia-cuda
# CHECK-NEXT:producer openmp
# CHECK-EMPTY:
# CHECK-NEXT:OFFLOADING IMAGE [3]:
# CHECK-NEXT:kind <none>
# CHECK-NEXT:arch sm_70
# CHECK-NEXT:arch sm_75
# CHECK-NEXT:triple nvptx64-nvidia-cuda
# CHECK-NEXT:producer none
Loading