llvm · bratpiorka · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
@@ -113,7 +113,7 @@ enum class OffloadArch {
   BMG_G21,
   LAST,
 
-  CudaDefault = OffloadArch::SM_52,
+  CudaDefault = OffloadArch::SM_75,
   HIPDefault = OffloadArch::GFX906,
 };
 

diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 7000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
diff --git a/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o b/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o
diff --git a/clang/test/Driver/cuda-detect.cu b/clang/test/Driver/cuda-detect.cu
@@ -60,7 +60,7 @@
 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
 // RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON \
@@ -96,14 +96,14 @@
 
 
 // Verify that -nocudainc prevents adding include path to CUDA headers.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
 
 // We should not add any CUDA include paths if there's no valid CUDA installation
 // RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
@@ -123,10 +123,10 @@
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
 
 // Verify that  -nocudalib prevents linking libdevice bitcode in.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON
 
@@ -152,10 +152,10 @@
 // RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix CUDA80
 
-// Verify that if no version file is found, we report the default of 7.0.
+// Verify that if no version file is found, we report the default of 10.0.
 // RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN:   | FileCheck %s -check-prefix CUDA70
+// RUN:   | FileCheck %s -check-prefix CUDA100
 
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
 // NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda
@@ -174,6 +174,7 @@
 // LIBDEVICE50-SAME: libdevice.compute_50.10.bc
 // PTX42-SAME: "-target-feature" "+ptx42"
 // PTX60-SAME: "-target-feature" "+ptx60"
+// PTX63-SAME: "-target-feature" "+ptx63"
 // CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
 // NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
 // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
@@ -188,14 +189,20 @@
 // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
 // CHECK-CXXINCLUDE: ld{{.*}}"
 
+// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: ld{{.*}}"
+
 // CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA80-SAME: -target-sdk-version=8.0
 // CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // CUDA80-SAME: -target-sdk-version=8.0
 // CUDA80: ld{{.*}}"
 
-// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: ld{{.*}}"
+// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: ld{{.*}}"
diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -2,14 +2,14 @@
 // -fgpu-flush-denormals-to-zero. This should be translated to
 // -fdenormal-fp-math-f32=preserve-sign
 
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test alias options -f[no-]cuda-flush-denormals-to-zero
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test explicit argument, with CUDA offload kind
 // RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s

diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu
@@ -5,12 +5,12 @@
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
 // RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
-// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s
+// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s
 
 // CHECK: "-cc1"{{.*}} "-triple" "nvptx
-// CHECK-SAME: "-target-cpu" "sm_52"
+// CHECK-SAME: "-target-cpu" "sm_75"
 
 // CHECK: ptxas
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
 
 // CHECK: "-cc1"{{.*}} "-target-cpu" "haswell"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
@@ -104,12 +104,12 @@
 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
 
 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-//    we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
+//    we default to sm_75 -- same as if no --cuda-gpu-arch were passed.
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
 // RUN:   --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
 // RUN:   -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s
 
 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
@@ -193,6 +193,8 @@
 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
+// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75"
+// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75"
 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
 
 // Match device-side preprocessor and compiler phases with -save-temps.

diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu
@@ -1,8 +1,8 @@
 // RUN: %clang -### --target=i386-unknown-linux \
-// RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN:   --ptxas-path=/some/path/to/ptxas %s 2>&1 \
 // RUN: | FileCheck %s
 
 // CHECK-NOT: "ptxas"
 // CHECK: "/some/path/to/ptxas"
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
diff --git a/clang/test/Driver/cuda-short-ptr.cu b/clang/test/Driver/cuda-short-ptr.cu
@@ -1,6 +1,6 @@
 // Checks that cuda compilation does the right thing when passed -fcuda-short-ptr
 
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s
 
 // CHECK: "-mllvm" "--nvptx-short-ptr"
 // CHECK-SAME: "-fcuda-short-ptr"
diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu
@@ -1,4 +1,4 @@
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
@@ -15,43 +15,43 @@
 // RUN:    --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX
 
-// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60.
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // This should only complain about sm_60, not sm_35.
 // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35
 
 // We should get two errors here, one for sm_60 and one for sm_61.
 // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61
 
 // We should still get an error if we pass -nocudainc, because this compilation
 // would invoke ptxas, and we do a version check on that, too.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // If with -nocudainc and -E, we don't touch the CUDA install, so we
 // shouldn't get an error.
 // RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // --no-cuda-version-check should suppress all of these errors.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \
 // RUN:    --no-cuda-version-check %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // We need to make sure the version check is done only for the device toolchain,
 // therefore we should not get an error in host-only mode. We use the -S here
 // to avoid the error being produced in case by the assembler tool, which does
 // the same check.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // OK-NOT: error: GPU arch

diff --git a/clang/test/Driver/cuda-windows.cu b/clang/test/Driver/cuda-windows.cu
@@ -1,6 +1,6 @@
-// RUN: %clang -v --target=i386-pc-windows-msvc \
+// RUN: %clang -v --target=i386-pc-windows-msvc --cuda-gpu-arch=sm_50 \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
-// RUN: %clang -v --target=i386-pc-windows-mingw32 \
+// RUN: %clang -v --target=i386-pc-windows-mingw32 --cuda-gpu-arch=sm_50 \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
 
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0

diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
@@ -26,7 +26,7 @@
 
 // llvm-bc and llvm-ll outputs need to match regular suffixes
 // (unfortunately).
-// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
+// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
 //
 // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"

diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp
@@ -297,7 +297,7 @@ int main(int argc, const char **argv) {
       CB.SetCudaSDK(CudaPath);
 
     if (OffloadArch.empty()) {
-      OffloadArch = "sm_35";
+      OffloadArch = "sm_75";
     }
     CB.SetOffloadArch(OffloadArch);
 

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -228,9 +228,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
     return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88
                                         : hasArchAccelFeatures();
   }
-  // If the user did not provide a target we default to the `sm_30` target.
+  // If the user did not provide a target we default to the `sm_75` target.
   std::string getTargetName() const {
-    return TargetName.empty() ? "sm_30" : TargetName;
+    return TargetName.empty() ? "sm_75" : TargetName;
   }
   bool hasTargetName() const { return !TargetName.empty(); }
 

diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s
-; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s
-; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
-; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s | %ptxas-verify %}
+; RUN: llc < %s -mcpu=sm_30 | FileCheck -check-prefix=ENABLED %s
+; RUN: llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | FileCheck -check-prefix=DISABLED %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s -mcpu=sm_30 | %ptxas-verify %}
 
 target triple = "nvptx64-nvidia-cuda"
 

diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}
+; RUN: llc < %s -mtriple=nvptx64-- -mcpu=sm_30 2>&1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- -mcpu=sm_30 | %ptxas-verify %}
 
 define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: srem_i128(

diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
+; RUN: llc < %s -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16
 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16
-; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_30 | %ptxas-verify %}
 ; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 ; RUN: %if ptxas-sm_80 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %}
 

diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %}
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_30 | %ptxas-verify %}
 
 ; CHECK: .target sm_30, debug
 

diff --git a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml
@@ -20,11 +20,11 @@ Members:
     - Key:            "triple"
       Value:          "nvptx64-nvidia-cuda"
     - Key:            "arch"
-      Value:          "sm_52"
+      Value:          "sm_75"
   - ImageKind:      IMG_None
     OffloadKind:    OFK_None
     String:
     - Key:            "triple"
       Value:          "nvptx64-nvidia-cuda"
     - Key:            "arch"
-      Value:          "sm_70"
+      Value:          "sm_75"
diff --git a/llvm/test/tools/llvm-objdump/Offloading/coff.test b/llvm/test/tools/llvm-objdump/Offloading/coff.test
@@ -31,12 +31,12 @@ symbols:
 # CHECK-EMPTY:
 # CHECK-NEXT:OFFLOADING IMAGE [2]:
 # CHECK-NEXT:kind            cubin
-# CHECK-NEXT:arch            sm_52
+# CHECK-NEXT:arch            sm_75
 # CHECK-NEXT:triple          nvptx64-nvidia-cuda
 # CHECK-NEXT:producer        openmp
 # CHECK-EMPTY:
 # CHECK-NEXT:OFFLOADING IMAGE [3]:
 # CHECK-NEXT:kind            <none>
-# CHECK-NEXT:arch            sm_70
+# CHECK-NEXT:arch            sm_75
 # CHECK-NEXT:triple          nvptx64-nvidia-cuda
 # CHECK-NEXT:producer        none
diff --git a/llvm/test/tools/llvm-objdump/Offloading/elf.test b/llvm/test/tools/llvm-objdump/Offloading/elf.test
@@ -40,12 +40,12 @@ Sections:
 # CHECK-EMPTY:
 # CHECK-NEXT:OFFLOADING IMAGE [2]:
 # CHECK-NEXT:kind            cubin
-# CHECK-NEXT:arch            sm_52
+# CHECK-NEXT:arch            sm_75
 # CHECK-NEXT:triple          nvptx64-nvidia-cuda
 # CHECK-NEXT:producer        openmp
 # CHECK-EMPTY:
 # CHECK-NEXT:OFFLOADING IMAGE [3]:
 # CHECK-NEXT:kind            <none>
-# CHECK-NEXT:arch            sm_70
+# CHECK-NEXT:arch            sm_75
 # CHECK-NEXT:triple          nvptx64-nvidia-cuda
 # CHECK-NEXT:producer        none