-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NonZero prevents values from being const-propagated properly #51346
Comments
Looking through *** IR Dump After Value Propagation ***
; Function Attrs: uwtable
define i64 @_ZN13nonzero_usize16calculate_layout17h044f84d640378d0bE(i1 zeroext %c) unnamed_addr #0 {
start:
%spec.select.i = select i1 %c, i64 0, i64 8
br i1 %c, label %bb2, label %bb4
bb2: ; preds = %start
unreachable
bb4: ; preds = %start
%0 = sub i64 8, %spec.select.i
%1 = insertvalue { i64, i1 } undef, i64 %0, 0
%2 = insertvalue { i64, i1 } %1, i1 false, 1
%3 = extractvalue { i64, i1 } %2, 1
br i1 %3, label %panic, label %bb6, !prof !0
bb6: ; preds = %bb4
%4 = extractvalue { i64, i1 } %2, 0
ret i64 %4
panic: ; preds = %bb4
call ... ; elided
unreachable
}
*** IR Dump After Simplify the CFG ***
; Function Attrs: uwtable
define i64 @_ZN13nonzero_usize16calculate_layout17h044f84d640378d0bE(i1 zeroext %c) unnamed_addr #0 {
start:
%spec.select.i = select i1 %c, i64 0, i64 8
%0 = sub i64 8, %spec.select.i
%1 = insertvalue { i64, i1 } undef, i64 %0, 0
%2 = insertvalue { i64, i1 } %1, i1 false, 1
%3 = extractvalue { i64, i1 } %2, 1
br i1 %3, label %panic, label %bb6, !prof !0
bb6: ; preds = %start
%4 = extractvalue { i64, i1 } %2, 0
ret i64 %4
panic: ; preds = %start
call ... ; elided
unreachable
} SimplifyCFG is removing |
In the Before/after IR with module-level cruft removed: *** IR Dump After Infer set function attributes ***
; Function Attrs: inlinehint noreturn uwtable
define internal void @_ZN4core4hint21unreachable_unchecked17h5c82d720186d4847E() unnamed_addr #0 {
start:
unreachable
}
; Function Attrs: uwtable
define internal { i64, i64 } @_ZN13nonzero_usize6repeat17hff27c5667304426aE(i1 zeroext %c) unnamed_addr #1 {
start:
br i1 %c, label %bb1, label %bb2
bb1: ; preds = %start
br label %bb3
bb2: ; preds = %start
br label %bb3
bb3: ; preds = %bb2, %bb1
%_0.sroa.0.0 = phi i64 [ 0, %bb1 ], [ 1, %bb2 ]
%0 = insertvalue { i64, i64 } undef, i64 %_0.sroa.0.0, 0
%1 = insertvalue { i64, i64 } %0, i64 8, 1
ret { i64, i64 } %1
}
; Function Attrs: uwtable
define i64 @_ZN13nonzero_usize16calculate_layout17h044f84d640378d0bE(i1 zeroext %c) unnamed_addr #1 {
start:
%0 = call { i64, i64 } @_ZN13nonzero_usize6repeat17hff27c5667304426aE(i1 zeroext %c)
%.fca.0.extract = extractvalue { i64, i64 } %0, 0
%.fca.1.extract = extractvalue { i64, i64 } %0, 1
%switch = icmp ult i64 %.fca.0.extract, 1
br i1 %switch, label %bb2, label %bb4
bb2: ; preds = %start
call void @_ZN4core4hint21unreachable_unchecked17h5c82d720186d4847E()
unreachable
bb4: ; preds = %start
%1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 8, i64 %.fca.1.extract)
%2 = extractvalue { i64, i1 } %1, 0
%3 = extractvalue { i64, i1 } %1, 1
br i1 %3, label %panic, label %bb5, !prof !0
bb5: ; preds = %bb4
ret i64 %2
panic: ; preds = %bb4
call void ... ; elided
unreachable
}
*** IR Dump After Interprocedural Sparse Conditional Constant Propagation ***
; Function Attrs: inlinehint noreturn uwtable
define internal void @_ZN4core4hint21unreachable_unchecked17h5c82d720186d4847E() unnamed_addr #0 {
start:
unreachable
}
; Function Attrs: uwtable
define internal { i64, i64 } @_ZN13nonzero_usize6repeat17hff27c5667304426aE(i1 zeroext %c) unnamed_addr #1 {
start:
br i1 %c, label %bb1, label %bb2
bb1: ; preds = %start
br label %bb3
bb2: ; preds = %start
br label %bb3
bb3: ; preds = %bb2, %bb1
%_0.sroa.0.0 = phi i64 [ 0, %bb1 ], [ 1, %bb2 ]
%0 = insertvalue { i64, i64 } undef, i64 %_0.sroa.0.0, 0
%1 = insertvalue { i64, i64 } %0, i64 8, 1
ret { i64, i64 } %1
}
; Function Attrs: uwtable
define i64 @_ZN13nonzero_usize16calculate_layout17h044f84d640378d0bE(i1 zeroext %c) unnamed_addr #1 {
start:
%0 = call { i64, i64 } @_ZN13nonzero_usize6repeat17hff27c5667304426aE(i1 zeroext %c)
%.fca.0.extract = extractvalue { i64, i64 } %0, 0
%switch = icmp ult i64 %.fca.0.extract, 1
br i1 %switch, label %bb2, label %bb4
bb2: ; preds = %start
call void @_ZN4core4hint21unreachable_unchecked17h5c82d720186d4847E()
unreachable
bb4: ; preds = %start
%1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 8, i64 8)
%2 = extractvalue { i64, i1 } %1, 0
%3 = extractvalue { i64, i1 } %1, 1
br i1 %3, label %panic, label %bb5, !prof !0
bb5: ; preds = %bb4
ret i64 %2
panic: ; preds = %bb4
call void ... ; elided
unreachable
} |
@rkruppe Look at this version which is closer to the original code and produces the following LLVM IR for
These lines in particular highlight the problem:
In |
@rust-lang/wg-codegen |
@Amanieu Interesting, that one can't be handled by exploiting UB (but we should still track that other problem, it'll affect users of define i64 @foo(i64 %c) {
bb1:
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c, i64 8)
%overflow = extractvalue { i64, i1 } %mul, 1
%select = select i1 %overflow, i64 0, i64 8
br i1 %overflow, label %abort, label %bb2
bb2:
call void @dummy(i64 %select)
ret i64 %select
abort:
call void @abort()
unreachable
}
declare void @abort()
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
declare void @dummy(i64)
Removing the call to Curiously, it also gets optimized if the select is sunk into bb2. I assume that's because of this cheapskate test. Which makes me wonder why the definition isn't sunk into bb2 by any pass in the -O2 pipeline. |
Turns out there is a code sinking pass and |
Talked about this with @sunfishcode on IRC. Seems like there's no real reason for |
At least there should be an LLVM bug tracking this as well. |
I'm somewhat worried about the amount of code that could be affected by this considering how often |
@rkruppe Could we try enabling the sinking pass in rustc so that we can see what the performance impact looks like (positive or negative)? This seems like something that would benefit Rust much more than C/C++. |
AFAIK we still don't have infrastructure for good measuring the performance impact of a change on the run time of anything other than rustc itself. Without that, I'm not confident in our ability to check for regressions and quantify them.
Possibly, but there's more cases like that (e.g. range checks) and we've generally been hesitant to deviate too far from the established pass pipelines in those cases too. |
We did at some point make up a custom LLVM pass pipeline, so experimenting
with it is welcome.
…On Fri, Jul 6, 2018, 17:38 Robin Kruppe ***@***.***> wrote:
AFAIK we still don't have infrastructure for good measuring the
performance impact of a change on the run time of anything other than rustc
itself. Without that, I'm not confident in our ability to check for
regressions and quantify them.
This seems like something that would benefit Rust much more than C/C++
Possibly, but there's more cases like that (e.g. range checks) and we've
generally been hesitant to deviate too far from the established pass
pipelines in those cases too.
—
You are receiving this because you are on a team that was mentioned.
Reply to this email directly, view it on GitHub
<#51346 (comment)>,
or mute the thread
<https://github.com/notifications/unsubscribe-auth/AApc0hejmgQNcUVi3hhKadd064mUOEGBks5uD3Z-gaJpZM4UZOz8>
.
|
Three years later The example given in the first post optimizes as expected since Rust 1.52 (Godbolt), however the example from this comment still doesn't optimize away the |
…ock. This patch tries to sink instructions when they are only used in a successor block. This is a further enhancement patch based on Anna's commit: D109700, which allows sinking an instruction having multiple uses in a single user. In this patch, sink instructions with multiple users in a single successor block will be supported. It could fix a known issue from rust: rust-lang/rust#51346 (comment) Reviewed By: nikic, reames Differential Revision: https://reviews.llvm.org/D121585
Fix in llvm/llvm-project@0af3e6a |
@Amanieu could you help to check whether this issue can be closed? |
I can confirm that this fixes the problem. @nikic Do you think this is worth backporting or should we just wait for LLVM 15? |
…ock. This patch tries to sink instructions when they are only used in a successor block. This is a further enhancement patch based on Anna's commit: D109700, which allows sinking an instruction having multiple uses in a single user. In this patch, sink instructions with multiple users in a single successor block will be supported. It could fix a known issue from rust: rust-lang/rust#51346 (comment) Reviewed By: nikic, reames Differential Revision: https://reviews.llvm.org/D121585
We've upgraded to LLVM 15, which includes the fix. |
Found while tracking down the root cause of the slowdown in #51340. This is the minimized code which reproduces the performance issue (Playground link).
This code produces the following output:
Note that in practice, it is impossible for this function to return anything other than 0 (
8 - 8
) due to the call tounreachable_unchecked
.The version using
usize
does not suffer from this (Playground link):The new code produces the following output:
cc @gnzlbg @rkruppe
The text was updated successfully, but these errors were encountered: