From e04a04d95fd575cd9317c453aaa0c180419df47e Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 10 Mar 2025 06:55:10 -0400 Subject: [PATCH 1/8] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking If we know that the initial GEP was inbounds, and we change it to a sequence of GEPs from the same base pointer where every offset is non-negative, then the new GEPs are inbounds. For SWDEV-516125. --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++++++++++---- .../AMDGPU/preserve-inbounds.ll | 23 +++++++++++++++++++ .../NVPTX/split-gep-and-gvn.ll | 16 ++++++------- .../NVPTX/split-gep.ll | 8 +++---- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index e048015298461..70940939be165 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } + bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds(); + // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. // Notice that we don't remove struct field indices here. If LowerGEP is @@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // and the old index if they are not used. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); + MayRecoverInbounds = + MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative(); } } } @@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // address with silently-wrapping two's complement arithmetic". // Therefore, the final code will be a semantically equivalent. // - // TODO(jingyue): do some range analysis to keep as many inbounds as - // possible. GEPs with inbounds are more friendly to alias analysis. - // TODO(gep_nowrap): Preserve nuw at least. - GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none(); - GEP->setNoWrapFlags(GEPNoWrapFlags::none()); + // If the initial GEP was inbounds and all variable indices and the + // accumulated offsets are non-negative, they can be added in any order and + // the intermediate results are in bounds. So, we can preserve the inbounds + // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis. + // + // TODO(gep_nowrap): Preserve nuw? + GEPNoWrapFlags NewGEPFlags = + MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(); + GEP->setNoWrapFlags(NewGEPFlags); // Lowers a GEP to either GEPs with a single index or arithmetic operations. if (LowerGEP) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 422e5d8215502..01619aa481ddd 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -16,3 +16,26 @@ entry: %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx ret ptr %arrayidx } + +; All offsets must be positive, so inbounds can be preserved. +define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) { +; CHECK-LABEL: @must_be_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 +; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4 +; CHECK-NEXT: ret void +; +entry: + %i.prom = zext i32 %i to i64 + %idx = add nsw i64 %i.prom, 1 + %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx + %3 = load float, ptr %arrayidx.src, align 4 + %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx + store float %3, ptr %arrayidx.dst, align 4 + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 9a73feb2c4b5c..4474585bf9b06 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] @@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 77b3434f4f159..da04a6e979425 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_explicit( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: @@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_long_idx( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: From b3caba7a296891cf32232e3e9cc784f0848b02d0 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 11 Mar 2025 10:05:26 -0400 Subject: [PATCH 2/8] Simplify test case and add more --- .../AMDGPU/preserve-inbounds.ll | 126 ++++++++++++++++-- 1 file changed, 112 insertions(+), 14 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 01619aa481ddd..6865fcf663c58 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -17,25 +17,123 @@ entry: ret ptr %arrayidx } -; All offsets must be positive, so inbounds can be preserved. -define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) { +; All indices must be non-negative, so inbounds can be preserved. +define ptr @must_be_inbounds(ptr %p, i32 %i) { ; CHECK-LABEL: @must_be_inbounds( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]] -; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]] -; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 -; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4 -; CHECK-NEXT: ret void +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] ; entry: %i.prom = zext i32 %i to i64 %idx = add nsw i64 %i.prom, 1 - %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx - %3 = load float, ptr %arrayidx.src, align 4 - %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx - store float %3, ptr %arrayidx.dst, align 4 - ret void + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; idx must be non-negative -> preserve inbounds +define ptr @sign_bit_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %idx = and i64 %i, u0x7fffffffffffffff + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be negative -> don't preserve inbounds +define ptr @sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -256 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0xffffffffffffff00 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be 0 or very negative -> don't preserve inbounds +define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @only_sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0x8000000000000000 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; all indices non-negative -> preserve inbounds +define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_nonnegative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, 5 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds +define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; One index may be negative -> don't preserve inbounds +define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_not_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xffffffffffffff00 + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx } From 69fe986a2c3da7e27d8c51fdf87a0e2d07ced98c Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 21 Mar 2025 09:33:18 -0400 Subject: [PATCH 3/8] Use isKnownNonNegative, add available information to the SimplifyQuery --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 70940939be165..71a8a08f9aed6 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1082,7 +1082,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); MayRecoverInbounds = - MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative(); + MayRecoverInbounds && + isKnownNonNegative( + NewIdx, + SimplifyQuery(*DL, TLI, DT, /*AC=*/nullptr, /*CXTI=*/GEP)); } } } From 99df66378ac345759307b8dc04fee3cf38636ed9 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 27 Mar 2025 05:00:29 -0400 Subject: [PATCH 4/8] Preserve more GEP flags based on NUW --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 94 +++- .../AMDGPU/preserve-inbounds.ll | 405 +++++++++++++++++- 2 files changed, 480 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 71a8a08f9aed6..25cf0f23d1908 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -235,8 +235,10 @@ class ConstantOffsetExtractor { /// \p GEP The given GEP /// \p UserChainTail Outputs the tail of UserChain so that we can /// garbage-collect unused instructions in UserChain. + /// \p PreservesNUW Outputs whether the extraction allows preserving the + /// GEP's nuw flag, if it has one. static Value *Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail); + User *&UserChainTail, bool &PreservesNUW); /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if @@ -267,6 +269,13 @@ class ConstantOffsetExtractor { APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, bool ZeroExtended); + /// A helper function to check if a subsequent call to rebuildWithoutConst + /// will allow preserving the GEP's nuw flag. That is the case if all + /// reassociated binary operations are add nuw and no non-nuw trunc is + /// distributed through an add. + /// Can only be called after find has populated the UserChain. + bool checkRebuildingPreservesNUW() const; + /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new /// index I' according to UserChain produced by function "find". @@ -676,6 +685,30 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) { return Current; } +bool ConstantOffsetExtractor::checkRebuildingPreservesNUW() const { + auto AllowsPreservingNUW = [](User *U) { + if (BinaryOperator *BO = dyn_cast(U)) { + auto Opcode = BO->getOpcode(); + if (Opcode == BinaryOperator::Or) { + // Ors are only considered here if they are disjoint. The addition that + // they represent in this case is NUW. + assert(cast(BO)->isDisjoint()); + return true; + } + return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap(); + } + // UserChain can only contain ConstantInt, CastInst, or BinaryOperator. + // Among the possible CastInsts, only trunc without nuw is a problem: If it + // is distributed through an add nuw, wrapping may occur: + // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" + if (TruncInst *TI = dyn_cast(U)) + return TI->hasNoUnsignedWrap(); + return true; + }; + + return all_of(UserChain, AllowsPreservingNUW); +} + Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { distributeExtsAndCloneChain(UserChain.size() - 1); // Remove all nullptrs (used to be s/zext) from UserChain. @@ -779,7 +812,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { } Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail) { + User *&UserChainTail, + bool &PreservesNUW) { ConstantOffsetExtractor Extractor(GEP->getIterator()); // Find a non-zero constant offset first. APInt ConstantOffset = @@ -787,8 +821,12 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, GEP->isInBounds()); if (ConstantOffset == 0) { UserChainTail = nullptr; + PreservesNUW = true; return nullptr; } + + PreservesNUW = Extractor.checkRebuildingPreservesNUW(); + // Separates the constant offset from the GEP index. Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset(); UserChainTail = Extractor.UserChain.back(); @@ -1052,7 +1090,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } - bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds(); + // Track information for preserving GEP flags. + bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0; + bool AllNUWPreserved = true; // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. @@ -1072,8 +1112,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // uses the variadic part as the new index. Value *OldIdx = GEP->getOperand(I); User *UserChainTail; - Value *NewIdx = - ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail); + bool PreservesNUW; + Value *NewIdx = ConstantOffsetExtractor::Extract( + OldIdx, GEP, UserChainTail, PreservesNUW); if (NewIdx != nullptr) { // Switches to the index with the constant offset removed. GEP->setOperand(I, NewIdx); @@ -1081,11 +1122,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // and the old index if they are not used. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); - MayRecoverInbounds = - MayRecoverInbounds && - isKnownNonNegative( - NewIdx, - SimplifyQuery(*DL, TLI, DT, /*AC=*/nullptr, /*CXTI=*/GEP)); + AllOffsetsNonNegative = + AllOffsetsNonNegative && isKnownNonNegative(NewIdx, *DL); + AllNUWPreserved &= PreservesNUW; } } } @@ -1106,15 +1145,34 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // inbounds keyword is not present, the offsets are added to the base // address with silently-wrapping two's complement arithmetic". // Therefore, the final code will be a semantically equivalent. - // - // If the initial GEP was inbounds and all variable indices and the + GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none(); + + // If the initial GEP was inbounds/nusw and all variable indices and the // accumulated offsets are non-negative, they can be added in any order and - // the intermediate results are in bounds. So, we can preserve the inbounds - // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis. - // - // TODO(gep_nowrap): Preserve nuw? - GEPNoWrapFlags NewGEPFlags = - MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(); + // the intermediate results are in bounds and don't overflow in a nusw sense. + // So, we can preserve the inbounds/nusw flag for both GEPs. + bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative; + + // If the initial GEP was NUW and all operations that we reassociate were NUW + // additions, the resulting GEPs are also NUW. + if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) { + NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap(); + // If the initial GEP additionally had NUSW (or inbounds, which implies + // NUSW), we know that the indices in the initial GEP must all have their + // signbit not set. For indices that are the result of NUW adds, the + // add-operands therefore also don't have their signbit set. Therefore, all + // indices of the resulting GEPs are non-negative -> we can preserve + // the inbounds/nusw flag. + CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap(); + } + + if (CanPreserveInBoundsNUSW) { + if (GEP->isInBounds()) + NewGEPFlags |= GEPNoWrapFlags::inBounds(); + else if (GEP->hasNoUnsignedSignedWrap()) + NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap(); + } + GEP->setNoWrapFlags(NewGEPFlags); // Lowers a GEP to either GEPs with a single index or arithmetic operations. diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 6865fcf663c58..a4f3e2b913116 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=separate-const-offset-from-gep -S | FileCheck %s +; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s + +; gfx1200 is particularly interesting since it allows negative immediate offsets +; in flat instructions, so the transformation is applied in more cases. ; The inbounds flags cannot be preserved here: If the pointers point to the ; beginning of an object and %i is 1, the intermediate GEPs are out of bounds. @@ -137,3 +140,403 @@ entry: %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add ret ptr %arrayidx } + + +define ptr @nuw_implies_nuw(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; This is poison anyway, so we can preserve the flags. +define ptr @nuw_inbounds_implies_nuw_inbounds_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Also poison. +define ptr @nuw_implies_nuw_nusw_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw_nusw_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + + +define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = or disjoint i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; This is poison anyway, so we can do the transformation. +define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = or disjoint i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Check that nothing happens for non-disjoint ors +define ptr @or_no_disjoint(ptr %p, i64 %i) { +; CHECK-LABEL: @or_no_disjoint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = or i64 [[I:%.*]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %idx = or i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @no_nuw_inbounds_for_sub(ptr %p, i64 %i) { +; CHECK-LABEL: @no_nuw_inbounds_for_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = sub nuw i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @no_nuw_inbounds_for_sub_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @no_nuw_inbounds_for_sub_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = sub nuw i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Can't preserved nuw and other flags here as distributing the trunc towards the +; leaves can introduce new wraps. +define ptr @nuw_inbounds_trunc(ptr %p, i128 %i) { +; CHECK-LABEL: @nuw_inbounds_trunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i128 %i, 1 + %idx.conv = trunc i128 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +; trunc nuw is not a problem. +define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i128 %i, 1 + %idx.conv = trunc nuw i128 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_sext(ptr %p, i32 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i32 %i, 1 + %idx.conv = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_zext(ptr %p, i32 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i32 %i, 1 + %idx.conv = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_zext_negative(ptr %p, i8 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CONV:%.*]] = zext i8 [[I:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX_CONV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 960 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i8 %i, -16 + %idx.conv = zext i8 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + + +; This test and the following ones mask most bits of %v off to facilitate +; validation with alive2 while still allowing interesting values. +define ptr @nuw_inbounds_implies_nuw_inbounds_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V_MASKED:%.*]] = and i64 [[V:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V_MASKED]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_implies_nuw_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_implies_nuw_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, -16 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + + +; Neither inbounds nor nuw can be preserved. +define ptr @nuw_inbounds_nested_not_all_nuw(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_nested_not_all_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + + +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel(ptr %src, i64 %i1, i64 %i2) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; + %idx1 = add nuw i64 %i1, 1 + %idx2 = add nuw i64 2, %i2 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2 + ret ptr %arrayidx +} + +; Neither inbounds nor nuw can be preserved. +define ptr @nuw_inbounds_multilevel_not_all_nuw(ptr %src, i64 %i1, i64 %i2) { +; CHECK-LABEL: @nuw_inbounds_multilevel_not_all_nuw( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 24 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; + %idx1 = add nuw i64 %i1, 1 + %idx2 = add i64 2, %i2 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2 + ret ptr %arrayidx +} + +; Missing information about non-extracted indices does not matter. +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(ptr %src, i64 %i1, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[V:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; + %idx1 = add nuw i64 %i1, 1 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %v + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(ptr %src, i64 %i1, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[V:%.*]], i64 [[I1:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; + %idx1 = add nuw i64 %i1, 1 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %v, i64 %idx1 + ret ptr %arrayidx +} From 56dbc9a86b4927518f011dc9f425076d083271cd Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 31 Mar 2025 07:01:23 -0400 Subject: [PATCH 5/8] Change some _negative tests to actually use negative numbers --- .../AMDGPU/preserve-inbounds.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index a4f3e2b913116..2a5b678e91fd8 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -394,12 +394,12 @@ define ptr @nuw_inbounds_implies_nuw_inbounds_nested_negative(ptr %p, i64 %i, i6 ; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 ; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 ; CHECK-NEXT: ret ptr [[ARRAYIDX3]] ; entry: %v.masked = and i64 %v, u0xf0000003c000000f - %idx1 = add nuw i64 %i, 1 + %idx1 = add nuw i64 %i, -16 %idx2 = add nuw i64 %idx1, %v.masked %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 ret ptr %arrayidx @@ -462,12 +462,12 @@ define ptr @nuw_nusw_implies_nuw_nusw_nested_negative(ptr %p, i64 %i, i64 %v) { ; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 ; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64 ; CHECK-NEXT: ret ptr [[ARRAYIDX3]] ; entry: %v.masked = and i64 %v, u0xf0000003c000000f - %idx1 = add nuw i64 %i, 1 + %idx1 = add nuw i64 %i, -16 %idx2 = add nuw i64 %idx1, %v.masked %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2 ret ptr %arrayidx From 871264cbb4bc43392e9611d432262f3e68ddd515 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 10 Apr 2025 08:24:19 -0400 Subject: [PATCH 6/8] Simplify the checkRebuildingPreservesNUW computation --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 25cf0f23d1908..954a0e78404b8 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -269,13 +269,6 @@ class ConstantOffsetExtractor { APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, bool ZeroExtended); - /// A helper function to check if a subsequent call to rebuildWithoutConst - /// will allow preserving the GEP's nuw flag. That is the case if all - /// reassociated binary operations are add nuw and no non-nuw trunc is - /// distributed through an add. - /// Can only be called after find has populated the UserChain. - bool checkRebuildingPreservesNUW() const; - /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new /// index I' according to UserChain produced by function "find". @@ -685,30 +678,6 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) { return Current; } -bool ConstantOffsetExtractor::checkRebuildingPreservesNUW() const { - auto AllowsPreservingNUW = [](User *U) { - if (BinaryOperator *BO = dyn_cast(U)) { - auto Opcode = BO->getOpcode(); - if (Opcode == BinaryOperator::Or) { - // Ors are only considered here if they are disjoint. The addition that - // they represent in this case is NUW. - assert(cast(BO)->isDisjoint()); - return true; - } - return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap(); - } - // UserChain can only contain ConstantInt, CastInst, or BinaryOperator. - // Among the possible CastInsts, only trunc without nuw is a problem: If it - // is distributed through an add nuw, wrapping may occur: - // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" - if (TruncInst *TI = dyn_cast(U)) - return TI->hasNoUnsignedWrap(); - return true; - }; - - return all_of(UserChain, AllowsPreservingNUW); -} - Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { distributeExtsAndCloneChain(UserChain.size() - 1); // Remove all nullptrs (used to be s/zext) from UserChain. @@ -811,6 +780,30 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { return NewBO; } +/// A helper function to check if reassociating through an entry in the user +/// chain would invalidate the GEP's nuw flag. +static bool allowsPreservingNUW(User *U) { + assert(isa(U) || isa(U) || isa(U)); + if (BinaryOperator *BO = dyn_cast(U)) { + // Binary operations needd to be effectively add nuw. + auto Opcode = BO->getOpcode(); + if (Opcode == BinaryOperator::Or) { + // Ors are only considered here if they are disjoint. The addition that + // they represent in this case is NUW. + assert(cast(BO)->isDisjoint()); + return true; + } + return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap(); + } + // UserChain can only contain ConstantInt, CastInst, or BinaryOperator. + // Among the possible CastInsts, only trunc without nuw is a problem: If it + // is distributed through an add nuw, wrapping may occur: + // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" + if (TruncInst *TI = dyn_cast(U)) + return TI->hasNoUnsignedWrap(); + return true; +} + Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, User *&UserChainTail, bool &PreservesNUW) { @@ -825,7 +818,7 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, return nullptr; } - PreservesNUW = Extractor.checkRebuildingPreservesNUW(); + PreservesNUW = all_of(Extractor.UserChain, allowsPreservingNUW); // Separates the constant offset from the GEP index. Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset(); From dba2b315ef64a4fe28d11871b4f63a733c89a158 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 10 Apr 2025 10:06:25 -0400 Subject: [PATCH 7/8] Remove isa assert, fix comment typo --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 954a0e78404b8..3556a4d152f1c 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -783,9 +783,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { /// A helper function to check if reassociating through an entry in the user /// chain would invalidate the GEP's nuw flag. static bool allowsPreservingNUW(User *U) { - assert(isa(U) || isa(U) || isa(U)); if (BinaryOperator *BO = dyn_cast(U)) { - // Binary operations needd to be effectively add nuw. + // Binary operations need to be effectively add nuw. auto Opcode = BO->getOpcode(); if (Opcode == BinaryOperator::Or) { // Ors are only considered here if they are disjoint. The addition that @@ -801,7 +800,7 @@ static bool allowsPreservingNUW(User *U) { // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" if (TruncInst *TI = dyn_cast(U)) return TI->hasNoUnsignedWrap(); - return true; + return isa(U) || isa(U); } Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, From 7148a368cf74c0db1adabf62f8c60cccdf570cf0 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 14 Apr 2025 04:11:13 -0400 Subject: [PATCH 8/8] "User *U" -> "const User *U" --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 3556a4d152f1c..320b79203c0b3 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -782,8 +782,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { /// A helper function to check if reassociating through an entry in the user /// chain would invalidate the GEP's nuw flag. -static bool allowsPreservingNUW(User *U) { - if (BinaryOperator *BO = dyn_cast(U)) { +static bool allowsPreservingNUW(const User *U) { + if (const BinaryOperator *BO = dyn_cast(U)) { // Binary operations need to be effectively add nuw. auto Opcode = BO->getOpcode(); if (Opcode == BinaryOperator::Or) { @@ -798,7 +798,7 @@ static bool allowsPreservingNUW(User *U) { // Among the possible CastInsts, only trunc without nuw is a problem: If it // is distributed through an add nuw, wrapping may occur: // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" - if (TruncInst *TI = dyn_cast(U)) + if (const TruncInst *TI = dyn_cast(U)) return TI->hasNoUnsignedWrap(); return isa(U) || isa(U); }