Skip to content

Commit d77cf57

Browse files
[lit] Refactor available ptxas features (#154439)
ToT `lit` currently assumes that a given `ptxas` version supports all capabilities of prior `ptxas` releases. This approach was flexible enough to support the removal of 32-bit address compilation from `ptxas` in CUDA 12.1, but it struggles with the removal of Volta and prior compilation in CUDA 13.0. To deal with this, this PR refactors how `lit` defines the set of features available for a given `ptxas` version. It invokes `ptxas` not just to get its version, but also to get the list of supported SMs, supported PTX ISA versions, and support for 32-bit compilation. This approach should be flexible enough to deal with the changing support matrix of `ptxas` as it goes forward. One obvious downside is that this relies on parsing the `stdout` of `ptxas`, something that's inherently unstable. But, IMO, this is something that we can fix as needed.
1 parent d3520a6 commit d77cf57

File tree

174 files changed

+366
-304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

174 files changed

+366
-304
lines changed

llvm/test/CodeGen/NVPTX/access-non-generic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
33
; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
44
; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
5-
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
5+
; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
66
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
77

88
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4

llvm/test/CodeGen/NVPTX/activemask.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -mtriple=nvptx64 -O2 -mcpu=sm_52 -mattr=+ptx62 | FileCheck %s
2-
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx62 | %ptxas-verify %}
2+
; RUN: %if ptxas-isa-6.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx62 | %ptxas-verify %}
33

44
declare i32 @llvm.nvvm.activemask()
55

llvm/test/CodeGen/NVPTX/addrspacecast-ptx64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -check-prefixes=NOPTRCONV
33
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | FileCheck %s -check-prefixes=PTRCONV
4-
; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
5-
; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify -arch=sm_90 %}
4+
; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
5+
; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify -arch=sm_90 %}
66

77
; ALL-LABEL: conv_shared_cluster_to_generic
88
define i32 @conv_shared_cluster_to_generic(ptr addrspace(7) %ptr) {

llvm/test/CodeGen/NVPTX/addrspacecast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32
22
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64
33
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64
4-
; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
4+
; RUN: %if ptxas-ptr32 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
55
; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
66
; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
77

llvm/test/CodeGen/NVPTX/alias.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -mattr=+ptx64 | FileCheck %s
2-
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -mattr=+ptx64 | %ptxas-verify %}
2+
; RUN: %if ptxas-isa-6.4 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -mattr=+ptx64 | %ptxas-verify %}
33

44
define i32 @a() { ret i32 0 }
55
@b = internal alias i32 (), ptr @a

llvm/test/CodeGen/NVPTX/annotations.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
3-
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
3+
; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
44
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
55

66
@texture = internal addrspace(1) global i64 0, align 8

llvm/test/CodeGen/NVPTX/applypriority.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| FileCheck --check-prefixes=CHECK-PTX64 %s
3-
; RUN: %if ptxas-11.4 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| %ptxas-verify -arch=sm_80 %}
3+
; RUN: %if ptxas-sm_80 && ptxas-isa-7.4 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| %ptxas-verify -arch=sm_80 %}
44

55
target triple = "nvptx64-nvidia-cuda"
66

llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s
3-
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
3+
; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
44
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
55

66
;; These tests should run for all targets

llvm/test/CodeGen/NVPTX/arithmetic-int.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
3-
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
3+
; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
44
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
55

66
;; These tests should run for all targets

llvm/test/CodeGen/NVPTX/async-copy.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s
22
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s
3-
; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4-
; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
3+
; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 && ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4+
; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
55

66
declare void @llvm.nvvm.cp.async.wait.group(i32)
77

0 commit comments

Comments
 (0)