diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl index 62fd20c4d1414..f9d7968fc5570 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) -// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]] +// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]] // CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]] // CHECK: if.then.i: // CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16 diff --git a/clang/test/CodeGenOpenCL/as_type.cl b/clang/test/CodeGenOpenCL/as_type.cl index 2c6cdc3810b4d..33e34b03a5633 100644 --- a/clang/test/CodeGenOpenCL/as_type.cl +++ b/clang/test/CodeGenOpenCL/as_type.cl @@ -67,7 +67,7 @@ int3 f8(char16 x) { return __builtin_astype(x, int3); } -//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]]) +//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]]) //CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1) //CHECK: ret ptr addrspace(1) %[[cast]] global int* addr_cast(int *x) { diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cc72a37f68599..f9ee3243eea9d 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be reversible (i.e. casting the result back to the original address space should yield the original bit pattern). +Which address space casts are supported depends on the target. Unsupported +address space casts return :ref:`poison `. + Example: """""""" diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index a17417cb5189c..13c9be794a9ab 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7483,6 +7483,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, case Instruction::FCmp: case Instruction::GetElementPtr: return false; + case Instruction::AddrSpaceCast: + return true; default: { const auto *CE = dyn_cast(Op); if (isa(Op) || (CE && CE->isCast())) diff --git a/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll b/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll index 9a5f789e88fa9..02c047f442af1 100644 --- a/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll +++ b/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA ; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr ; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false) +; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false) ; CHECK-NEXT: ret void ; %1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll new file mode 100644 index 0000000000000..0be7d5a04b2e5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s + +; Check that `select B, true, C` isn't optimized to `or B, C`, +; because the invalid addrspacecast %asc.shared introduces poison. +define i1 @not_fold_select(ptr addrspace(1) noundef %x) { +; CHECK-LABEL: define i1 @not_fold_select( +; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null +; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] +; + entry: + %asc.flat = addrspacecast ptr addrspace(1) %x to ptr + %is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat) + %asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3) + %shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null + %result = icmp eq ptr addrspace(3) %shared.addr, null + ret i1 %result +}