Skip to content

Commit d726c84

Browse files
committed
Auto merge of #103331 - nnethercote:convert-switch-to-br, r=scottmcm
Use `br` instead of `switch` in more cases. `codegen_switchint_terminator` already uses `br` instead of `switch` when there is one normal target plus the `otherwise` target. But there's another common case with two normal targets and an `otherwise` target that points to an empty unreachable BB. This comes up a lot when switching on the tags of enums that use niches. The pattern looks like this: ``` bb1: ; preds = %bb6 %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4 %4 = sub i8 %3, 2 %5 = icmp eq i8 %4, 0 %_6 = select i1 %5, i64 0, i64 1 switch i64 %_6, label %bb3 [ i64 0, label %bb4 i64 1, label %bb2 ] bb3: ; preds = %bb1 unreachable ``` This commit adds code to convert the `switch` to a `br`: ``` bb1: ; preds = %bb6 %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4 %4 = sub i8 %3, 2 %5 = icmp eq i8 %4, 0 %_6 = select i1 %5, i64 0, i64 1 %6 = icmp eq i64 %_6, 0 br i1 %6, label %bb4, label %bb2 bb3: ; No predecessors! unreachable ``` This has a surprisingly large effect on compile times, with reductions of 5% on debug builds of some crates. The reduction is all due to LLVM taking less time. Maybe LLVM is just much better at handling `br` than `switch`. The resulting code is still suboptimal. - The `icmp`, `select`, `icmp` sequence is silly, converting an `i1` to an `i64` and back to an `i1`. But with the current code structure it's hard to avoid, and LLVM will easily clean it up, in opt builds at least. - `bb3` is usually now truly dead code (though not always, so it can't be removed universally). r? `@scottmcm`
2 parents 77e57db + 003a3f8 commit d726c84

File tree

5 files changed

+116
-31
lines changed

5 files changed

+116
-31
lines changed

compiler/rustc_codegen_ssa/src/mir/block.rs

+28-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use rustc_middle::mir::{self, AssertKind, SwitchTargets};
1717
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
1818
use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
1919
use rustc_middle::ty::{self, Instance, Ty, TypeVisitable};
20+
use rustc_session::config::OptLevel;
2021
use rustc_span::source_map::Span;
2122
use rustc_span::{sym, Symbol};
2223
use rustc_symbol_mangling::typeid::typeid_for_fnabi;
@@ -286,12 +287,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
286287
assert_eq!(discr.layout.ty, switch_ty);
287288
let mut target_iter = targets.iter();
288289
if target_iter.len() == 1 {
289-
// If there are two targets (one conditional, one fallback), emit br instead of switch
290+
// If there are two targets (one conditional, one fallback), emit `br` instead of
291+
// `switch`.
290292
let (test_value, target) = target_iter.next().unwrap();
291293
let lltrue = helper.llbb_with_cleanup(self, target);
292294
let llfalse = helper.llbb_with_cleanup(self, targets.otherwise());
293295
if switch_ty == bx.tcx().types.bool {
294-
// Don't generate trivial icmps when switching on bool
296+
// Don't generate trivial icmps when switching on bool.
295297
match test_value {
296298
0 => bx.cond_br(discr.immediate(), llfalse, lltrue),
297299
1 => bx.cond_br(discr.immediate(), lltrue, llfalse),
@@ -303,6 +305,30 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
303305
let cmp = bx.icmp(IntPredicate::IntEQ, discr.immediate(), llval);
304306
bx.cond_br(cmp, lltrue, llfalse);
305307
}
308+
} else if self.cx.sess().opts.optimize == OptLevel::No
309+
&& target_iter.len() == 2
310+
&& self.mir[targets.otherwise()].is_empty_unreachable()
311+
{
312+
// In unoptimized builds, if there are two normal targets and the `otherwise` target is
313+
// an unreachable BB, emit `br` instead of `switch`. This leaves behind the unreachable
314+
// BB, which will usually (but not always) be dead code.
315+
//
316+
// Why only in unoptimized builds?
317+
// - In unoptimized builds LLVM uses FastISel which does not support switches, so it
318+
// must fall back to the to the slower SelectionDAG isel. Therefore, using `br` gives
319+
// significant compile time speedups for unoptimized builds.
320+
// - In optimized builds the above doesn't hold, and using `br` sometimes results in
321+
// worse generated code because LLVM can no longer tell that the value being switched
322+
// on can only have two values, e.g. 0 and 1.
323+
//
324+
let (test_value1, target1) = target_iter.next().unwrap();
325+
let (_test_value2, target2) = target_iter.next().unwrap();
326+
let ll1 = helper.llbb_with_cleanup(self, target1);
327+
let ll2 = helper.llbb_with_cleanup(self, target2);
328+
let switch_llty = bx.immediate_backend_type(bx.layout_of(switch_ty));
329+
let llval = bx.const_uint_big(switch_llty, test_value1);
330+
let cmp = bx.icmp(IntPredicate::IntEQ, discr.immediate(), llval);
331+
bx.cond_br(cmp, ll1, ll2);
306332
} else {
307333
bx.switch(
308334
discr.immediate(),

compiler/rustc_middle/src/mir/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,11 @@ impl<'tcx> BasicBlockData<'tcx> {
11861186
pub fn visitable(&self, index: usize) -> &dyn MirVisitable<'tcx> {
11871187
if index < self.statements.len() { &self.statements[index] } else { &self.terminator }
11881188
}
1189+
1190+
/// Does the block have no statements and an unreachable terminator?
1191+
pub fn is_empty_unreachable(&self) -> bool {
1192+
self.statements.is_empty() && matches!(self.terminator().kind, TerminatorKind::Unreachable)
1193+
}
11891194
}
11901195

11911196
impl<O> AssertKind<O> {

src/test/codegen/match-optimized.rs

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// compile-flags: -C no-prepopulate-passes -O
2+
3+
#![crate_type = "lib"]
4+
5+
pub enum E {
6+
A,
7+
B,
8+
C,
9+
}
10+
11+
// CHECK-LABEL: @exhaustive_match
12+
#[no_mangle]
13+
pub fn exhaustive_match(e: E) -> u8 {
14+
// CHECK: switch{{.*}}, label %[[OTHERWISE:[a-zA-Z0-9_]+]] [
15+
// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[A:[a-zA-Z0-9_]+]]
16+
// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[B:[a-zA-Z0-9_]+]]
17+
// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[C:[a-zA-Z0-9_]+]]
18+
// CHECK-NEXT: ]
19+
// CHECK: [[OTHERWISE]]:
20+
// CHECK-NEXT: unreachable
21+
//
22+
// CHECK: [[A]]:
23+
// CHECK-NEXT: store i8 0, {{i8\*|ptr}} %1, align 1
24+
// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]]
25+
// CHECK: [[B]]:
26+
// CHECK-NEXT: store i8 1, {{i8\*|ptr}} %1, align 1
27+
// CHECK-NEXT: br label %[[EXIT]]
28+
// CHECK: [[C]]:
29+
// CHECK-NEXT: store i8 2, {{i8\*|ptr}} %1, align 1
30+
// CHECK-NEXT: br label %[[EXIT]]
31+
match e {
32+
E::A => 0,
33+
E::B => 1,
34+
E::C => 2,
35+
}
36+
}
37+
38+
#[repr(u16)]
39+
pub enum E2 {
40+
A = 13,
41+
B = 42,
42+
}
43+
44+
// For optimized code we produce a switch with an unreachable target as the `otherwise` so LLVM
45+
// knows the possible values. Compare with `src/test/codegen/match-unoptimized.rs`.
46+
47+
// CHECK-LABEL: @exhaustive_match_2
48+
#[no_mangle]
49+
pub fn exhaustive_match_2(e: E2) -> u8 {
50+
// CHECK: switch i16 %{{.+}}, label %[[UNREACH:.+]] [
51+
// CHECK-NEXT: i16 13,
52+
// CHECK-NEXT: i16 42,
53+
// CHECK-NEXT: ]
54+
// CHECK: [[UNREACH]]:
55+
// CHECK-NEXT: unreachable
56+
match e {
57+
E2::A => 0,
58+
E2::B => 1,
59+
}
60+
}

src/test/codegen/match-unoptimized.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// compile-flags: -C no-prepopulate-passes -Copt-level=0
2+
3+
#![crate_type = "lib"]
4+
5+
#[repr(u16)]
6+
pub enum E2 {
7+
A = 13,
8+
B = 42,
9+
}
10+
11+
// For unoptimized code we produce a `br` instead of a `switch`. Compare with
12+
// `src/test/codegen/match-optimized.rs`
13+
14+
// CHECK-LABEL: @exhaustive_match_2
15+
#[no_mangle]
16+
pub fn exhaustive_match_2(e: E2) -> u8 {
17+
// CHECK: %[[CMP:.+]] = icmp eq i16 %{{.+}}, 13
18+
// CHECK-NEXT: br i1 %[[CMP:.+]],
19+
match e {
20+
E2::A => 0,
21+
E2::B => 1,
22+
}
23+
}

src/test/codegen/match.rs

-29
This file was deleted.

0 commit comments

Comments
 (0)