Skip to content

[ValueTracking] Return true for AddrSpaceCast in canCreateUndefOrPoison #144686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5))
// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]]
// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]]
// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]]
// CHECK: if.then.i:
// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/as_type.cl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ int3 f8(char16 x) {
return __builtin_astype(x, int3);
}

//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
//CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1)
//CHECK: ret ptr addrspace(1) %[[cast]]
global int* addr_cast(int *x) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be
reversible (i.e. casting the result back to the original address space
should yield the original bit pattern).

Which address space casts are supported depends on the target. Unsupported
address space casts return :ref:`poison <poisonvalues>`.

Example:
""""""""

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7483,6 +7483,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
case Instruction::FCmp:
case Instruction::GetElementPtr:
return false;
case Instruction::AddrSpaceCast:
return true;
default: {
const auto *CE = dyn_cast<ConstantExpr>(Op);
if (isa<CastInst>(Op) || (CE && CE->isCast()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA
; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
; CHECK-NEXT: ret void
;
%1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s

; Check that `select B, true, C` isn't optimized to `or B, C`,
; because the invalid addrspacecast %asc.shared introduces poison.
define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
; CHECK-LABEL: define i1 @not_fold_select(
; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]]
; CHECK-NEXT: ret i1 [[TMP4]]
;
entry:
%asc.flat = addrspacecast ptr addrspace(1) %x to ptr
%is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
%asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
%result = icmp eq ptr addrspace(3) %shared.addr, null
ret i1 %result
}