Skip to content

Commit 274feef

Browse files
Reland "[NVPTX] Emit prmt selection value in hex" (llvm#115952)
Initially landed in 3ed4b0b. Reverted in 375d192 because the [`load-store.ll`](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/NVPTX/load-store.ll) test was not updated after 5e75880. 5e75880 is now updated in 7a99f23.
1 parent 5fa47d8 commit 274feef

File tree

7 files changed

+135
-114
lines changed

7 files changed

+135
-114
lines changed

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,12 @@ void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
373373
}
374374
}
375375

376+
void NVPTXInstPrinter::printHexu32imm(const MCInst *MI, int OpNum,
377+
raw_ostream &O, const char *Modifier) {
378+
int64_t Imm = MI->getOperand(OpNum).getImm();
379+
O << formatHex(Imm) << "U";
380+
}
381+
376382
void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
377383
raw_ostream &O, const char *Modifier) {
378384
const MCOperand &Op = MI->getOperand(OpNum);

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
4848
raw_ostream &O, const char *Modifier = nullptr);
4949
void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
5050
const char *Modifier = nullptr);
51+
void printHexu32imm(const MCInst *MI, int OpNum, raw_ostream &O,
52+
const char *Modifier = nullptr);
5153
void printProtoIdent(const MCInst *MI, int OpNum,
5254
raw_ostream &O, const char *Modifier = nullptr);
5355
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

+6-2
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,10 @@ multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
17401740
[(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
17411741
}
17421742

1743+
def Hexu32imm : Operand<i32> {
1744+
let PrintMethod = "printHexu32imm";
1745+
}
1746+
17431747
multiclass PRMT<ValueType T, RegisterClass RC> {
17441748
def rrr
17451749
: NVPTXInst<(outs RC:$d),
@@ -1748,12 +1752,12 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
17481752
[(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>;
17491753
def rri
17501754
: NVPTXInst<(outs RC:$d),
1751-
(ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode),
1755+
(ins RC:$a, Int32Regs:$b, Hexu32imm:$c, PrmtMode:$mode),
17521756
!strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
17531757
[(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>;
17541758
def rii
17551759
: NVPTXInst<(outs RC:$d),
1756-
(ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode),
1760+
(ins RC:$a, i32imm:$b, Hexu32imm:$c, PrmtMode:$mode),
17571761
!strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
17581762
[(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>;
17591763
}

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

+74-74
Large diffs are not rendered by default.

llvm/test/CodeGen/NVPTX/load-store.ll

+24-24
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ define void @generic_4xi8(ptr %a) {
175175
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
176176
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
177177
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
178-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
178+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
179179
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
180180
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
181181
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -184,8 +184,8 @@ define void @generic_4xi8(ptr %a) {
184184
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
185185
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
186186
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
187-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
188-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
187+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
188+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
189189
; CHECK-NEXT: st.u32 [%rd1], %r12;
190190
; CHECK-NEXT: ret;
191191
%a.load = load <4 x i8>, ptr %a
@@ -519,7 +519,7 @@ define void @generic_volatile_4xi8(ptr %a) {
519519
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
520520
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
521521
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
522-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
522+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
523523
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
524524
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
525525
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -528,8 +528,8 @@ define void @generic_volatile_4xi8(ptr %a) {
528528
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
529529
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
530530
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
531-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
532-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
531+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
532+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
533533
; CHECK-NEXT: st.volatile.u32 [%rd1], %r12;
534534
; CHECK-NEXT: ret;
535535
%a.load = load volatile <4 x i8>, ptr %a
@@ -1424,7 +1424,7 @@ define void @global_4xi8(ptr addrspace(1) %a) {
14241424
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
14251425
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
14261426
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
1427-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
1427+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
14281428
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
14291429
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
14301430
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -1433,8 +1433,8 @@ define void @global_4xi8(ptr addrspace(1) %a) {
14331433
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
14341434
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
14351435
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
1436-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
1437-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
1436+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
1437+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
14381438
; CHECK-NEXT: st.global.u32 [%rd1], %r12;
14391439
; CHECK-NEXT: ret;
14401440
%a.load = load <4 x i8>, ptr addrspace(1) %a
@@ -1749,7 +1749,7 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
17491749
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
17501750
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
17511751
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
1752-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
1752+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
17531753
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
17541754
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
17551755
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -1758,8 +1758,8 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
17581758
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
17591759
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
17601760
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
1761-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
1762-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
1761+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
1762+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
17631763
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12;
17641764
; CHECK-NEXT: ret;
17651765
%a.load = load volatile <4 x i8>, ptr addrspace(1) %a
@@ -2796,7 +2796,7 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
27962796
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
27972797
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
27982798
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
2799-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
2799+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
28002800
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
28012801
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
28022802
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -2805,8 +2805,8 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
28052805
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
28062806
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
28072807
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
2808-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
2809-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
2808+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
2809+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
28102810
; CHECK-NEXT: st.shared.u32 [%rd1], %r12;
28112811
; CHECK-NEXT: ret;
28122812
%a.load = load <4 x i8>, ptr addrspace(3) %a
@@ -3121,7 +3121,7 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
31213121
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
31223122
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
31233123
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
3124-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
3124+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
31253125
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
31263126
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
31273127
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -3130,8 +3130,8 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
31303130
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
31313131
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
31323132
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
3133-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
3134-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
3133+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
3134+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
31353135
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12;
31363136
; CHECK-NEXT: ret;
31373137
%a.load = load volatile <4 x i8>, ptr addrspace(3) %a
@@ -4026,7 +4026,7 @@ define void @local_4xi8(ptr addrspace(5) %a) {
40264026
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
40274027
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
40284028
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
4029-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
4029+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
40304030
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
40314031
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
40324032
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -4035,8 +4035,8 @@ define void @local_4xi8(ptr addrspace(5) %a) {
40354035
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
40364036
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
40374037
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
4038-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
4039-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
4038+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
4039+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
40404040
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
40414041
; CHECK-NEXT: ret;
40424042
%a.load = load <4 x i8>, ptr addrspace(5) %a
@@ -4351,7 +4351,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
43514351
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
43524352
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
43534353
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
4354-
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
4354+
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
43554355
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
43564356
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
43574357
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
@@ -4360,8 +4360,8 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
43604360
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
43614361
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
43624362
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
4363-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
4364-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
4363+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
4364+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
43654365
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
43664366
; CHECK-NEXT: ret;
43674367
%a.load = load volatile <4 x i8>, ptr addrspace(5) %a

llvm/test/CodeGen/NVPTX/sext-setcc.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
5757
; CHECK-NEXT: setp.eq.s16 %p4, %rs8, 0;
5858
; CHECK-NEXT: selp.s32 %r6, -1, 0, %p4;
5959
; CHECK-NEXT: selp.s32 %r7, -1, 0, %p3;
60-
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 13120;
60+
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
6161
; CHECK-NEXT: selp.s32 %r9, -1, 0, %p2;
6262
; CHECK-NEXT: selp.s32 %r10, -1, 0, %p1;
63-
; CHECK-NEXT: prmt.b32 %r11, %r10, %r9, 13120;
64-
; CHECK-NEXT: prmt.b32 %r12, %r11, %r8, 21520;
63+
; CHECK-NEXT: prmt.b32 %r11, %r10, %r9, 0x3340U;
64+
; CHECK-NEXT: prmt.b32 %r12, %r11, %r8, 0x5410U;
6565
; CHECK-NEXT: st.param.b32 [func_retval0], %r12;
6666
; CHECK-NEXT: ret;
6767
entry:
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,27 @@
1-
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
2-
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-FOUND
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
3+
4+
target triple = "nvptx64-unknown-unknown"
35

46
define void @kernel_func(ptr %in.vec, ptr %out.vec0) nounwind {
5-
entry:
7+
; CHECK-LABEL: kernel_func(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b32 %r<10>;
10+
; CHECK-EMPTY:
11+
; CHECK-NEXT: // %bb.0:
12+
; CHECK-NEXT: ld.param.u32 %r1, [kernel_func_param_0];
13+
; CHECK-NEXT: ld.u32 %r2, [%r1+8];
14+
; CHECK-NEXT: ld.u32 %r3, [%r1];
15+
; CHECK-NEXT: ld.u32 %r4, [%r1+24];
16+
; CHECK-NEXT: ld.u32 %r5, [%r1+16];
17+
; CHECK-NEXT: ld.param.u32 %r6, [kernel_func_param_1];
18+
; CHECK-NEXT: prmt.b32 %r7, %r5, %r4, 0x4000U;
19+
; CHECK-NEXT: prmt.b32 %r8, %r3, %r2, 0x40U;
20+
; CHECK-NEXT: prmt.b32 %r9, %r8, %r7, 0x7610U;
21+
; CHECK-NEXT: st.u32 [%r6], %r9;
22+
; CHECK-NEXT: ret;
623
%wide.vec = load <32 x i8>, ptr %in.vec, align 64
724
%vec0 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
825
store <4 x i8> %vec0, ptr %out.vec0, align 64
926
ret void
10-
11-
; CHECK-FOUND: prmt.b32 {{.*}} 16384;
12-
; CHECK-FOUND: prmt.b32 {{.*}} 64;
13-
; CHECK-FOUND: prmt.b32 {{.*}} 30224;
14-
15-
; CHECK: @kernel_func
16-
; CHECK-NOT: prmt.b32 {{.*}} -1;
17-
; CHECK: -- End function
1827
}

0 commit comments

Comments
 (0)