diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index befb69ff22d49..08e50698d72ef 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -113,7 +113,7 @@ enum class OffloadArch { BMG_G21, LAST, - CudaDefault = OffloadArch::SM_52, + CudaDefault = OffloadArch::SM_75, HIPDefault = OffloadArch::GFX906, }; diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..c576bebd470dc --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 10000 + +// diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..c576bebd470dc --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 10000 + +// diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..558f2e2d02093 --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 7000 + +// diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o b/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o new file mode 100644 index 0000000000000..bfac6d6de63cc Binary files /dev/null and b/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o differ diff --git a/clang/test/Driver/cuda-detect.cu b/clang/test/Driver/cuda-detect.cu index 23b6ba2fcc09d..66e1a25e70eda 100644 --- a/clang/test/Driver/cuda-detect.cu +++ b/clang/test/Driver/cuda-detect.cu @@ -60,7 +60,7 @@ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30 +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ @@ -96,14 +96,14 @@ // Verify that -nocudainc prevents adding include path to CUDA headers. -// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35 -// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35 +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 // We should not add any CUDA include paths if there's no valid CUDA installation // RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ @@ -123,10 +123,10 @@ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE // Verify that -nocudalib prevents linking libdevice bitcode in. -// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON @@ -152,10 +152,10 @@ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix CUDA80 -// Verify that if no version file is found, we report the default of 7.0. +// Verify that if no version file is found, we report the default of 10.0. // RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ -// RUN: | FileCheck %s -check-prefix CUDA70 +// RUN: | FileCheck %s -check-prefix CUDA100 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda // NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda @@ -174,6 +174,7 @@ // LIBDEVICE50-SAME: libdevice.compute_50.10.bc // PTX42-SAME: "-target-feature" "+ptx42" // PTX60-SAME: "-target-feature" "+ptx60" +// PTX63-SAME: "-target-feature" "+ptx63" // CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h" // NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h" // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include" @@ -188,14 +189,20 @@ // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8" // CHECK-CXXINCLUDE: ld{{.*}}" +// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA70-SAME: -target-sdk-version=7.0 +// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CUDA70-SAME: -target-sdk-version=7.0 +// CUDA70: ld{{.*}}" + // CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda" // CUDA80-SAME: -target-sdk-version=8.0 // CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu" // CUDA80-SAME: -target-sdk-version=8.0 // CUDA80: ld{{.*}}" -// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// CUDA70-SAME: -target-sdk-version=7.0 -// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CUDA70-SAME: -target-sdk-version=7.0 -// CUDA70: ld{{.*}}" +// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA100-SAME: -target-sdk-version=10.0 +// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CUDA100-SAME: -target-sdk-version=10.0 +// CUDA100: ld{{.*}}" diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu index ea808f2302fbb..adad6dfe632d3 100644 --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -2,14 +2,14 @@ // -fgpu-flush-denormals-to-zero. This should be translated to // -fdenormal-fp-math-f32=preserve-sign -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test alias options -f[no-]cuda-flush-denormals-to-zero -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test explicit argument, with CUDA offload kind // RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu index 2dbb9cdf6f589..7684b1df0d685 100644 --- a/clang/test/Driver/cuda-march.cu +++ b/clang/test/Driver/cuda-march.cu @@ -5,12 +5,12 @@ // RUN: %clang -### --target=x86_64-linux-gnu -c \ // RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s // RUN: %clang -### --target=x86_64-linux-gnu -c \ -// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s +// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s // CHECK: "-cc1"{{.*}} "-triple" "nvptx -// CHECK-SAME: "-target-cpu" "sm_52" +// CHECK-SAME: "-target-cpu" "sm_75" // CHECK: ptxas -// CHECK-SAME: "--gpu-name" "sm_52" +// CHECK-SAME: "--gpu-name" "sm_75" // CHECK: "-cc1"{{.*}} "-target-cpu" "haswell" diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index fc8e83a2bb279..312556707ef19 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -104,12 +104,12 @@ // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X -// we default to sm_52 -- same as if no --cuda-gpu-arch were passed. +// we default to sm_75 -- same as if no --cuda-gpu-arch were passed. // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \ // RUN: --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s +// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ @@ -193,6 +193,8 @@ // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60" // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70" // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70" +// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75" +// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75" // ARCHALLERROR: error: unsupported CUDA gpu architecture: all // Match device-side preprocessor and compiler phases with -save-temps. diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu index f36dcc94558f1..7027984d07b2e 100644 --- a/clang/test/Driver/cuda-ptxas-path.cu +++ b/clang/test/Driver/cuda-ptxas-path.cu @@ -1,8 +1,8 @@ // RUN: %clang -### --target=i386-unknown-linux \ -// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: --ptxas-path=/some/path/to/ptxas %s 2>&1 \ // RUN: | FileCheck %s // CHECK-NOT: "ptxas" // CHECK: "/some/path/to/ptxas" -// CHECK-SAME: "--gpu-name" "sm_52" +// CHECK-SAME: "--gpu-name" "sm_75" diff --git a/clang/test/Driver/cuda-short-ptr.cu b/clang/test/Driver/cuda-short-ptr.cu index e0ae4505e0b56..bf3c1c168b922 100644 --- a/clang/test/Driver/cuda-short-ptr.cu +++ b/clang/test/Driver/cuda-short-ptr.cu @@ -1,6 +1,6 @@ // Checks that cuda compilation does the right thing when passed -fcuda-short-ptr -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s // CHECK: "-mllvm" "--nvptx-short-ptr" // CHECK-SAME: "-fcuda-short-ptr" diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu index 9eceb928ffabd..4b43012b39483 100644 --- a/clang/test/Driver/cuda-version-check.cu +++ b/clang/test/Driver/cuda-version-check.cu @@ -1,4 +1,4 @@ -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK @@ -15,33 +15,33 @@ // RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX -// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60. -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60. +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // This should only complain about sm_60, not sm_35. // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35 // We should get two errors here, one for sm_60 and one for sm_61. // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61 // We should still get an error if we pass -nocudainc, because this compilation // would invoke ptxas, and we do a version check on that, too. -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // If with -nocudainc and -E, we don't touch the CUDA install, so we // shouldn't get an error. // RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // --no-cuda-version-check should suppress all of these errors. -// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \ // RUN: --no-cuda-version-check %s | \ // RUN: FileCheck %s --check-prefix=OK @@ -49,9 +49,9 @@ // therefore we should not get an error in host-only mode. We use the -S here // to avoid the error being produced in case by the assembler tool, which does // the same check. -// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \ +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // OK-NOT: error: GPU arch diff --git a/clang/test/Driver/cuda-windows.cu b/clang/test/Driver/cuda-windows.cu index 4459e809072d9..4b28117b62524 100644 --- a/clang/test/Driver/cuda-windows.cu +++ b/clang/test/Driver/cuda-windows.cu @@ -1,6 +1,6 @@ -// RUN: %clang -v --target=i386-pc-windows-msvc \ +// RUN: %clang -v --target=i386-pc-windows-msvc --cuda-gpu-arch=sm_50 \ // RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s -// RUN: %clang -v --target=i386-pc-windows-mingw32 \ +// RUN: %clang -v --target=i386-pc-windows-mingw32 --cuda-gpu-arch=sm_50 \ // RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0 diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu index 596e6cfe07379..e4a773b487c6c 100644 --- a/clang/test/Driver/lto.cu +++ b/clang/test/Driver/lto.cu @@ -26,7 +26,7 @@ // llvm-bc and llvm-ll outputs need to match regular suffixes // (unfortunately). -// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t +// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s // // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu" diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp index 066f526cba9ae..a70f6a3ded174 100644 --- a/clang/tools/clang-repl/ClangRepl.cpp +++ b/clang/tools/clang-repl/ClangRepl.cpp @@ -297,7 +297,7 @@ int main(int argc, const char **argv) { CB.SetCudaSDK(CudaPath); if (OffloadArch.empty()) { - OffloadArch = "sm_35"; + OffloadArch = "sm_75"; } CB.SetOffloadArch(OffloadArch); diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index f11d331862081..0ce1df8ce5a42 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -228,9 +228,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88 : hasArchAccelFeatures(); } - // If the user did not provide a target we default to the `sm_30` target. + // If the user did not provide a target we default to the `sm_75` target. std::string getTargetName() const { - return TargetName.empty() ? "sm_30" : TargetName; + return TargetName.empty() ? "sm_75" : TargetName; } bool hasTargetName() const { return !TargetName.empty(); } diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll index dd9a472984c25..b5fe715fe6b4d 100644 --- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll +++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s -; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s -; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} -; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s | %ptxas-verify %} +; RUN: llc < %s -mcpu=sm_30 | FileCheck -check-prefix=ENABLED %s +; RUN: llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | FileCheck -check-prefix=DISABLED %s +; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %} +; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | %ptxas-verify %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index cdbbabe3e3b05..36efc809a4b66 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64-- -mcpu=sm_30 2>&1 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- -mcpu=sm_30 | %ptxas-verify %} define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: srem_i128( diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index 625c93c3f0a53..0db99bd1eecfc 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 +; RUN: llc < %s -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16 -; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %} ; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} ; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %} diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index fa42481016540..0dc9620a34ec6 100644 --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | %ptxas-verify %} ; CHECK: .target sm_30, debug diff --git a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml index 703c93b24dcc0..0b535627eee2b 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml +++ b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml @@ -20,11 +20,11 @@ Members: - Key: "triple" Value: "nvptx64-nvidia-cuda" - Key: "arch" - Value: "sm_52" + Value: "sm_75" - ImageKind: IMG_None OffloadKind: OFK_None String: - Key: "triple" Value: "nvptx64-nvidia-cuda" - Key: "arch" - Value: "sm_70" + Value: "sm_75" diff --git a/llvm/test/tools/llvm-objdump/Offloading/coff.test b/llvm/test/tools/llvm-objdump/Offloading/coff.test index 022277d137bd4..4ac92d830078b 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/coff.test +++ b/llvm/test/tools/llvm-objdump/Offloading/coff.test @@ -31,12 +31,12 @@ symbols: # CHECK-EMPTY: # CHECK-NEXT:OFFLOADING IMAGE [2]: # CHECK-NEXT:kind cubin -# CHECK-NEXT:arch sm_52 +# CHECK-NEXT:arch sm_75 # CHECK-NEXT:triple nvptx64-nvidia-cuda # CHECK-NEXT:producer openmp # CHECK-EMPTY: # CHECK-NEXT:OFFLOADING IMAGE [3]: # CHECK-NEXT:kind -# CHECK-NEXT:arch sm_70 +# CHECK-NEXT:arch sm_75 # CHECK-NEXT:triple nvptx64-nvidia-cuda # CHECK-NEXT:producer none diff --git a/llvm/test/tools/llvm-objdump/Offloading/elf.test b/llvm/test/tools/llvm-objdump/Offloading/elf.test index 10182aeb856cd..9fbed4be8d99b 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/elf.test +++ b/llvm/test/tools/llvm-objdump/Offloading/elf.test @@ -40,12 +40,12 @@ Sections: # CHECK-EMPTY: # CHECK-NEXT:OFFLOADING IMAGE [2]: # CHECK-NEXT:kind cubin -# CHECK-NEXT:arch sm_52 +# CHECK-NEXT:arch sm_75 # CHECK-NEXT:triple nvptx64-nvidia-cuda # CHECK-NEXT:producer openmp # CHECK-EMPTY: # CHECK-NEXT:OFFLOADING IMAGE [3]: # CHECK-NEXT:kind -# CHECK-NEXT:arch sm_70 +# CHECK-NEXT:arch sm_75 # CHECK-NEXT:triple nvptx64-nvidia-cuda # CHECK-NEXT:producer none diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h index 34c85de3418ec..f27c9048c63e9 100644 --- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h @@ -28,11 +28,11 @@ struct GPUToNVVMPipelineOptions llvm::cl::init("nvptx64-nvidia-cuda")}; PassOptions::Option cubinChip{ *this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."), - llvm::cl::init("sm_50")}; + llvm::cl::init("sm_75")}; PassOptions::Option cubinFeatures{ *this, "cubin-features", llvm::cl::desc("Features to use to serialize to cubin."), - llvm::cl::init("+ptx60")}; + llvm::cl::init("+ptx63")}; PassOptions::Option cubinFormat{ *this, "cubin-format", llvm::cl::desc("Compilation format to use to serialize to cubin."), diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 0c8a0c7a677ab..5a3e50cb1b6b4 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -143,10 +143,10 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> { /*default=*/ "\"nvptx64-nvidia-cuda\"", "Target triple.">, Option<"chip", "chip", "std::string", - /*default=*/"\"sm_50\"", + /*default=*/"\"sm_75\"", "Target chip.">, Option<"features", "features", "std::string", - /*default=*/"\"+ptx60\"", + /*default=*/"\"+ptx63\"", "Target features.">, Option<"optLevel", "O", "unsigned", /*default=*/"2", diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index c4eb4872af2c6..d323aa08274cc 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -5849,8 +5849,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target", let parameters = (ins DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple, - StringRefParameter<"Target chip.", "\"sm_50\"">:$chip, - StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features, + StringRefParameter<"Target chip.", "\"sm_75\"">:$chip, + StringRefParameter<"Target chip features.", "\"+ptx63\"">:$features, OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link, DefaultValuedParameter<"bool", "true", "Perform SM version check on Ops.">:$verifyTarget @@ -5861,8 +5861,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target", let builders = [ AttrBuilder<(ins CArg<"int", "2">:$optLevel, CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple, - CArg<"StringRef", "\"sm_50\"">:$chip, - CArg<"StringRef", "\"+ptx60\"">:$features, + CArg<"StringRef", "\"sm_75\"">:$chip, + CArg<"StringRef", "\"+ptx63\"">:$features, CArg<"DictionaryAttr", "nullptr">:$targetFlags, CArg<"ArrayAttr", "nullptr">:$linkFiles, CArg<"bool", "true">:$verifyTarget), [{ diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 1a009b7dfa30d..ba1746c402743 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -51,10 +51,10 @@ def testObjectAttr(): print(o) object = ( - b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50" + b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.3\n.target sm_75" ) o = gpu.ObjectAttr.get(target, format, object) - # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50"> + # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.3\0A.target sm_75"> print(o) assert o.object == object