Skip to content

Commit 3e31569

Browse files
committed
DAG: Use correct pointer size for llvm.ptrmask
This was ignoring the address space, and would assert on address spaces with a different size from the default.
1 parent d19265b commit 3e31569

File tree

2 files changed

+137
-5
lines changed

2 files changed

+137
-5
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6837,11 +6837,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
68376837
SDValue Ptr = getValue(I.getOperand(0));
68386838
SDValue Const = getValue(I.getOperand(1));
68396839

6840-
EVT DestVT =
6841-
EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
6842-
6843-
setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
6844-
DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
6840+
EVT PtrVT = Ptr.getValueType();
6841+
setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
6842+
DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
68456843
return;
68466844
}
68476845
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
define i8 addrspace(1)* @v_ptrmask_global_variable_i64(i8 addrspace(1)* %ptr, i64 %mask) {
5+
; GCN-LABEL: v_ptrmask_global_variable_i64:
6+
; GCN: ; %bb.0:
7+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; GCN-NEXT: v_and_b32_e32 v1, v1, v3
9+
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
10+
; GCN-NEXT: s_setpc_b64 s[30:31]
11+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
12+
ret i8 addrspace(1)* %masked
13+
}
14+
15+
define i8 addrspace(1)* @v_ptrmask_global_variable_i32(i8 addrspace(1)* %ptr, i32 %mask) {
16+
; GCN-LABEL: v_ptrmask_global_variable_i32:
17+
; GCN: ; %bb.0:
18+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
20+
; GCN-NEXT: v_mov_b32_e32 v1, 0
21+
; GCN-NEXT: s_setpc_b64 s[30:31]
22+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
23+
ret i8 addrspace(1)* %masked
24+
}
25+
26+
define i8 addrspace(1)* @v_ptrmask_global_variable_i16(i8 addrspace(1)* %ptr, i16 %mask) {
27+
; GCN-LABEL: v_ptrmask_global_variable_i16:
28+
; GCN: ; %bb.0:
29+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30+
; GCN-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
31+
; GCN-NEXT: v_mov_b32_e32 v1, 0
32+
; GCN-NEXT: s_setpc_b64 s[30:31]
33+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
34+
ret i8 addrspace(1)* %masked
35+
}
36+
37+
define i8 addrspace(3)* @v_ptrmask_local_variable_i64(i8 addrspace(3)* %ptr, i64 %mask) {
38+
; GCN-LABEL: v_ptrmask_local_variable_i64:
39+
; GCN: ; %bb.0:
40+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41+
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
42+
; GCN-NEXT: s_setpc_b64 s[30:31]
43+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
44+
ret i8 addrspace(3)* %masked
45+
}
46+
47+
define i8 addrspace(3)* @v_ptrmask_local_variable_i32(i8 addrspace(3)* %ptr, i32 %mask) {
48+
; GCN-LABEL: v_ptrmask_local_variable_i32:
49+
; GCN: ; %bb.0:
50+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
52+
; GCN-NEXT: s_setpc_b64 s[30:31]
53+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
54+
ret i8 addrspace(3)* %masked
55+
}
56+
57+
define i8 addrspace(3)* @v_ptrmask_local_variable_i16(i8 addrspace(3)* %ptr, i16 %mask) {
58+
; GCN-LABEL: v_ptrmask_local_variable_i16:
59+
; GCN: ; %bb.0:
60+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61+
; GCN-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
62+
; GCN-NEXT: s_setpc_b64 s[30:31]
63+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
64+
ret i8 addrspace(3)* %masked
65+
}
66+
67+
define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i64(i8 addrspace(1)* inreg %ptr, i64 inreg %mask) {
68+
; GCN-LABEL: s_ptrmask_global_variable_i64:
69+
; GCN: ; %bb.0:
70+
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
71+
; GCN-NEXT: ; return to shader part epilog
72+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
73+
ret i8 addrspace(1)* %masked
74+
}
75+
76+
define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i32(i8 addrspace(1)* inreg %ptr, i32 inreg %mask) {
77+
; GCN-LABEL: s_ptrmask_global_variable_i32:
78+
; GCN: ; %bb.0:
79+
; GCN-NEXT: s_mov_b32 s5, 0
80+
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
81+
; GCN-NEXT: s_mov_b32 s1, 0
82+
; GCN-NEXT: ; return to shader part epilog
83+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
84+
ret i8 addrspace(1)* %masked
85+
}
86+
87+
define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i16(i8 addrspace(1)* inreg %ptr, i16 inreg %mask) {
88+
; GCN-LABEL: s_ptrmask_global_variable_i16:
89+
; GCN: ; %bb.0:
90+
; GCN-NEXT: s_and_b32 s0, s4, 0xffff
91+
; GCN-NEXT: s_mov_b32 s1, 0
92+
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
93+
; GCN-NEXT: s_mov_b32 s1, 0
94+
; GCN-NEXT: ; return to shader part epilog
95+
%masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
96+
ret i8 addrspace(1)* %masked
97+
}
98+
99+
define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i64(i8 addrspace(3)* inreg %ptr, i64 inreg %mask) {
100+
; GCN-LABEL: s_ptrmask_local_variable_i64:
101+
; GCN: ; %bb.0:
102+
; GCN-NEXT: s_and_b32 s0, s2, s3
103+
; GCN-NEXT: ; return to shader part epilog
104+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
105+
ret i8 addrspace(3)* %masked
106+
}
107+
108+
define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i32(i8 addrspace(3)* inreg %ptr, i32 inreg %mask) {
109+
; GCN-LABEL: s_ptrmask_local_variable_i32:
110+
; GCN: ; %bb.0:
111+
; GCN-NEXT: s_and_b32 s0, s2, s3
112+
; GCN-NEXT: ; return to shader part epilog
113+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
114+
ret i8 addrspace(3)* %masked
115+
}
116+
117+
define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i16(i8 addrspace(3)* inreg %ptr, i16 inreg %mask) {
118+
; GCN-LABEL: s_ptrmask_local_variable_i16:
119+
; GCN: ; %bb.0:
120+
; GCN-NEXT: s_and_b32 s0, 0xffff, s3
121+
; GCN-NEXT: s_and_b32 s0, s2, s0
122+
; GCN-NEXT: ; return to shader part epilog
123+
%masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
124+
ret i8 addrspace(3)* %masked
125+
}
126+
127+
declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) #0
128+
declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0
129+
declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) #0
130+
declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0
131+
declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)*, i32) #0
132+
declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)*, i16) #0
133+
134+
attributes #0 = { nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)